From 5808a4fbec794ffa0c58609af72895ff0540bbcc Mon Sep 17 00:00:00 2001
From: Gabriel Blanchard <Gabriel.Blanchard@Scale.com>
Date: Mon, 17 Mar 2025 15:09:44 -0400
Subject: [PATCH 1/2] Add pagination option for
 items_and_annotations_generator. Default of 10,000 items per page remains,
 but includes the option to specify fewer pages to reduce timout errors.

---
 nucleus/dataset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nucleus/dataset.py b/nucleus/dataset.py
index 26fa57e7..ea95f840 100644
--- a/nucleus/dataset.py
+++ b/nucleus/dataset.py
@@ -1518,6 +1518,7 @@ def items_and_annotation_generator(
         query: Optional[str] = None,
         use_mirrored_images: bool = False,
         only_most_recent_tasks: bool = True,
+        page_size=10000
     ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
         """Provides a generator of all DatasetItems and Annotations in the dataset.
 
@@ -1525,6 +1526,7 @@ def items_and_annotation_generator(
             query: Structured query compatible with the `Nucleus query language <https://nucleus.scale.com/docs/query-language-reference>`_.
             use_mirrored_images: If True, returns the location of the mirrored image hosted in Scale S3. Useful when the original image is no longer available.
             only_most_recent_tasks: If True, only the annotations corresponding to the most recent task for each item is returned.
+            page_size: Number of items to fetch per page. Default is maximum ES page size of 10000.
 
         Returns:
             Generator where each element is a dict containing the DatasetItem
@@ -1548,7 +1550,7 @@ def items_and_annotation_generator(
             client=self._client,
             endpoint=f"dataset/{self.id}/exportForTrainingPage",
             result_key=EXPORT_FOR_TRAINING_KEY,
-            page_size=10000,  # max ES page size
+            page_size=page_size,  # default is max ES page size of 10000
             query=query,
             chip=use_mirrored_images,
             onlyMostRecentTask=only_most_recent_tasks,

From 44a58bd36adda252a013c16c533465b32aa3b3a7 Mon Sep 17 00:00:00 2001
From: Gabriel Blanchard <Gabriel.Blanchard@Scale.com>
Date: Wed, 19 Mar 2025 10:58:08 -0400
Subject: [PATCH 2/2] Bumped package version Included changes to changelog and
 project toml

---
 CHANGELOG.md   | 6 ++++++
 pyproject.toml | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index caf3b741..d8f6abd6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## [0.17.10](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.10) - 2025-03-19
+
+### Added
+- Adding page size variable to `items_and_annotation_generator()` to reduce timeout errors for customers with large datasets
+
 ## [0.17.9](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.9) - 2025-03-11
 
 ### Added
diff --git a/pyproject.toml b/pyproject.toml
index aa169d82..8fe8503e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"]  # Easy ignore for getting it running
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.17.9"
+version = "0.17.10"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]