oracle
diff --git a/‎doc/src/api_manual/async_cursor.rst‎
Lines changed: 4 additions & 0 deletions b/‎doc/src/api_manual/async_cursor.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/src/api_manual/cursor.rst‎
Lines changed: 4 additions & 0 deletions b/‎doc/src/api_manual/cursor.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/src/release_notes.rst‎
Lines changed: 4 additions & 0 deletions b/‎doc/src/release_notes.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/src/user_guide/batch_statement.rst‎
Lines changed: 35 additions & 8 deletions b/‎doc/src/user_guide/batch_statement.rst‎
Lines changed: 35 additions & 8 deletions
diff --git a/‎src/oracledb/base_impl.pxd‎
Lines changed: 31 additions & 9 deletions b/‎src/oracledb/base_impl.pxd‎
Lines changed: 31 additions & 9 deletions
diff --git a/‎src/oracledb/base_impl.pyx‎
Lines changed: 3 additions & 0 deletions b/‎src/oracledb/base_impl.pyx‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/oracledb/cursor.py‎
Lines changed: 37 additions & 8 deletions b/‎src/oracledb/cursor.py‎
Lines changed: 37 additions & 8 deletions
diff --git a/‎src/oracledb/impl/arrow/dataframe.pyx‎
Lines changed: 10 additions & 9 deletions b/‎src/oracledb/impl/arrow/dataframe.pyx‎
Lines changed: 10 additions & 9 deletions
@@ -77,6 +77,10 @@ AsyncCursor Methods
 
 .. automethod:: AsyncCursor.executemany
 
+    .. versionchanged:: 3.4.0
+
+        The ``batch_size`` parameter was added.
+
     .. versionchanged:: 3.3.0
 
         Added support for passing data frames in the ``parameters`` parameter.
 
@@ -83,6 +83,10 @@ Cursor Methods
 
 .. automethod:: Cursor.executemany
 
+    .. versionchanged:: 3.4.0
+
+        The ``batch_size`` parameter was added.
+
     .. versionchanged:: 3.3.0
 
         Added support for passing data frames in the ``parameters`` parameter.
 
@@ -47,6 +47,10 @@ Common Changes
 #)  Added support for types ``date32`` and ``date64`` when ingesting data
     frames supporting the Arrow PyCapsule interface as requested
     (`issue 535 <https://github.com/oracle/python-oracledb/issues/535>`__).
+#)  Added a ``batch_size`` parameter to :meth:`Cursor.executemany()` and
+    :meth:`AsyncCursor.executemany()` to let these methods operate on data in
+    batches.
+#)  Data frames with multiple chunks are now supported.
 #)  Added ``fetch_lobs`` and ``fetch_decimals`` parameters where applicable to
     the methods used for fetching rows or data frames from the database. Note
     that for the creation of pipeline operations, if these parameters are not
 
@@ -70,12 +70,7 @@ Each tuple value maps to one of the bind variable placeholders.
 
 This code requires only one :ref:`round-trip <roundtrips>` from the client to
 the database instead of the five round-trips that would be required for
-repeated calls to :meth:`~Cursor.execute()`.  For very large data sets, there
-may be an external buffer or network limits to how many rows can be processed,
-so repeated calls to ``executemany()`` may be required.  The limits are based
-on both the number of rows being processed as well as the "size" of each row
-that is being processed.  Repeated calls to :meth:`~Cursor.executemany()` are
-still better than repeated calls to :meth:`~Cursor.execute()`.
+repeated calls to :meth:`~Cursor.execute()`.
 
 To insert a single column, make sure the bind variables are correctly created
 as tuples, for example:
@@ -173,6 +168,38 @@ With named bind variables, use named parameters when calling
             values (:pid, :pdesc)""", data)
 
 
+Batching of Large Datasets
+--------------------------
+
+For very large data sets, there may be a buffer or network limit on how many
+rows can be processed. The limit is based on both the number of records as
+well as the size of each record that is being processed. In other cases, it may
+be faster to process smaller sets of records.
+
+To reduce the data sizes involved, you can either make repeated calls to
+:meth:`~Cursor.executemany()` as shown later in the CSV examples, or you can
+use the ``batch_size`` parameter to optimize transfer across the network to the
+database. For example:
+
+.. code-block:: python
+
+    data = [
+        (1, "Parent 1"),
+        (2, "Parent 2"),
+        . . .
+        (9_999_999, "Parent 9,999,999"),
+        (10_000_000, "Parent 10,000,000"),
+
+    ]
+
+    cursor.executemany("insert into ParentTable values (:1, :2)", data, batch_size=200_000)
+
+This will send the data to the database in batches of 200,000 records until all
+10,000,000 records have been inserted.
+
+If :attr:`Connection.autocommit` is ``True``, then a commit will take place per
+batch of records processed.
+
 .. _batchplsql:
 
 Batch Execution of PL/SQL
@@ -446,8 +473,8 @@ And the schema:
 
     create table test (id number, name varchar2(25));
 
-Data loading can be done in batches of records since the number of records may
-prevent all data being inserted at once:
+Data loading can be done in batches of records since Python memory limitations
+may prevent all the records being held in memory at once:
 
 .. code-block:: python
 
 
@@ -43,7 +43,8 @@ from .arrow_impl cimport (
     ArrowTimeUnit,
     ArrowType,
     ArrowArrayImpl,
-    ArrowSchemaImpl
+    ArrowSchemaImpl,
+    DataFrameImpl,
 )
 
 cdef enum:
@@ -263,6 +264,30 @@ cdef class DefaultsImpl:
 cdef DefaultsImpl C_DEFAULTS
 
 
+cdef class BatchLoadManager:
+    cdef:
+        readonly uint32_t num_rows
+        readonly uint64_t message_offset
+        uint64_t offset
+        BaseCursorImpl cursor_impl
+        uint32_t batch_size
+        uint32_t batch_num
+        object type_handler
+        object cursor
+        object conn
+
+    cdef int _calculate_num_rows_in_batch(self, uint64_t total_rows) except -1
+    cdef int _next_batch(self) except -1
+    cdef int _setup_cursor(self) except -1
+    @staticmethod
+    cdef BatchLoadManager create_for_executemany(
+        object cursor,
+        BaseCursorImpl cursor_impl,
+        object parameters,
+        uint32_t batch_size,
+    )
+
+
 cdef class Buffer:
     cdef:
         ssize_t _max_size, _size, _pos
@@ -689,7 +714,6 @@ cdef class BaseCursorImpl:
                                       object params, uint32_t num_rows,
                                       uint32_t row_num,
                                       bint defer_type_assignment) except -1
-    cdef int _check_binds(self, uint32_t num_execs) except -1
     cdef int _close(self, bint in_del) except -1
     cdef BaseVarImpl _create_fetch_var(self, object conn, object cursor,
                                        object type_handler, bint
@@ -706,10 +730,9 @@ cdef class BaseCursorImpl:
     cdef int _perform_binds(self, object conn, uint32_t num_execs) except -1
     cdef int _prepare(self, str statement, str tag,
                       bint cache_statement) except -1
-    cdef int _reset_bind_vars(self, uint32_t num_rows) except -1
+    cdef int _reset_bind_vars(self, uint64_t array_offset,
+                              uint32_t num_rows) except -1
     cdef int _verify_var(self, object var) except -1
-    cdef object bind_arrow_arrays(self, object cursor, list arrays)
-    cdef int bind_many(self, object cursor, list parameters) except -1
     cdef int bind_one(self, object cursor, object parameters) except -1
     cdef object _finish_building_arrow_arrays(self)
     cdef int _create_arrow_arrays(self) except -1
@@ -749,7 +772,8 @@ cdef class BaseVarImpl:
     cdef DbType _get_adjusted_type(self, uint8_t ora_type_num)
     cdef list _get_array_value(self)
     cdef object _get_scalar_value(self, uint32_t pos)
-    cdef int _on_reset_bind(self, uint32_t num_rows) except -1
+    cdef int _on_reset_bind(self, uint64_t array_offset,
+                            uint32_t num_rows) except -1
     cdef int _resize(self, uint32_t new_size) except -1
     cdef int _set_metadata_from_type(self, object typ) except -1
     cdef int _set_metadata_from_value(self, object value,
@@ -857,9 +881,6 @@ cdef class BindVar:
         ssize_t pos
         bint has_value
 
-    cdef int _create_var_from_arrow_array(self, object conn,
-                                          BaseCursorImpl cursor_impl,
-                                          ArrowArrayImpl array) except -1
     cdef int _create_var_from_type(self, object conn,
                                    BaseCursorImpl cursor_impl,
                                    object value) except -1
@@ -907,6 +928,7 @@ cdef class PipelineOpImpl:
         readonly uint8_t op_type
         readonly bint fetch_lobs
         readonly bint fetch_decimals
+        BatchLoadManager batch_load_manager
         uint32_t num_execs
 
 
 
@@ -82,6 +82,8 @@ from .arrow_impl cimport (
 import array
 
 import base64
+import collections
+import copy
 import copy
 import datetime
 import decimal
@@ -169,6 +171,7 @@ include "impl/base/pool.pyx"
 include "impl/base/cursor.pyx"
 include "impl/base/var.pyx"
 include "impl/base/bind_var.pyx"
+include "impl/base/batch_load_manager.pyx"
 include "impl/base/dbobject.pyx"
 include "impl/base/lob.pyx"
 include "impl/base/soda.pyx"
 
@@ -851,9 +851,11 @@ def executemany(
         self,
         statement: Optional[str],
         parameters: Any,
+        *,
         batcherrors: bool = False,
         arraydmlrowcounts: bool = False,
         suspend_on_success: bool = False,
+        batch_size: int = 2**32 - 1,
     ) -> None:
         """
         Executes a SQL statement once using all bind value mappings or
@@ -900,21 +902,35 @@ def executemany(
         sessionless transaction will be suspended when ``executemany()``
         completes successfully. See :ref:`suspendtxns`.
 
+        The ``batch_size`` parameter is used to split large data sets into
+        smaller pieces for sending to the database. It is the number of records
+        in each batch. This parameter can be used to tune performance. When
+        ``Connection.autocommit`` is *True*, a commit will take place for each
+        batch.
+
         For maximum efficiency, it is best to use the :meth:`setinputsizes()`
         method to specify the bind value types and sizes. In particular, if the
         type is not explicitly specified, the value *None* is assumed to be a
         string of length 1 so any values that are later bound as numbers or
         dates will raise a TypeError exception.
         """
         self._verify_open()
-        num_execs = self._impl._prepare_for_executemany(
-            self, self._normalize_statement(statement), parameters
+        manager = self._impl._prepare_for_executemany(
+            self,
+            self._normalize_statement(statement),
+            parameters,
+            batch_size,
         )
         self._impl.suspend_on_success = suspend_on_success
-        if num_execs > 0:
+        while manager.num_rows > 0:
             self._impl.executemany(
-                self, num_execs, bool(batcherrors), bool(arraydmlrowcounts)
+                self,
+                manager.num_rows,
+                batcherrors,
+                arraydmlrowcounts,
+                manager.message_offset,
             )
+            manager.next_batch()
 
     def fetchall(self) -> list:
         """
@@ -1188,9 +1204,11 @@ async def executemany(
         self,
         statement: Optional[str],
         parameters: Any,
+        *,
         batcherrors: bool = False,
         arraydmlrowcounts: bool = False,
         suspend_on_success: bool = False,
+        batch_size: int = 2**32 - 1,
     ) -> None:
         """
         Executes a SQL statement once using all bind value mappings or
@@ -1236,21 +1254,32 @@ async def executemany(
         sessionless transaction will be suspended when ``executemany()``
         completes successfully. See :ref:`suspendtxns`.
 
+        The ``batch_size`` parameter is used to split large data sets into
+        smaller pieces for sending to the database. It is the number of records
+        in each batch. This parameter can be used to tune performance. When
+        ``Connection.autocommit`` is *True*, a commit will take place for each
+        batch. Do not set ``batch_size`` when ``suspend_on_success`` is *True*.
+
         For maximum efficiency, it is best to use the :meth:`setinputsizes()`
         method to specify the parameter types and sizes ahead of time. In
         particular, the value *None* is assumed to be a string of length 1 so
         any values that are later bound as numbers or dates will raise a
         TypeError exception.
         """
         self._verify_open()
-        num_execs = self._impl._prepare_for_executemany(
-            self, self._normalize_statement(statement), parameters
+        manager = self._impl._prepare_for_executemany(
+            self, self._normalize_statement(statement), parameters, batch_size
         )
         self._impl.suspend_on_success = suspend_on_success
-        if num_execs > 0:
+        while manager.num_rows > 0:
             await self._impl.executemany(
-                self, num_execs, bool(batcherrors), bool(arraydmlrowcounts)
+                self,
+                manager.num_rows,
+                batcherrors,
+                arraydmlrowcounts,
+                manager.message_offset,
             )
+            manager.next_batch()
 
     async def fetchall(self) -> list:
         """
 
@@ -62,15 +62,16 @@ cdef class DataFrameImpl:
             df_impl.schema_impls.append(schema_impl)
 
         # populate list of arrays
-        _check_nanoarrow(arrow_stream.get_next(arrow_stream, &arrow_array))
-        for i in range(arrow_schema.n_children):
-            array_impl = ArrowArrayImpl.__new__(ArrowArrayImpl)
-            array_impl.populate_from_array(df_impl.schema_impls[i],
-                                           arrow_array.children[i])
-            df_impl.arrays.append(array_impl)
-        _check_nanoarrow(arrow_stream.get_next(arrow_stream, &arrow_array))
-        if arrow_array.release != NULL:
-            raise NotImplementedError("multiple chunks not supported")
+        while True:
+            _check_nanoarrow(arrow_stream.get_next(arrow_stream, &arrow_array))
+            if arrow_array.release == NULL:
+                break
+            for i in range(arrow_schema.n_children):
+                array_impl = ArrowArrayImpl.__new__(ArrowArrayImpl)
+                array_impl.populate_from_array(df_impl.schema_impls[i],
+                                               arrow_array.children[i])
+                df_impl.arrays.append(array_impl)
+
         ArrowArrayStreamRelease(arrow_stream)
         return df_impl