### without dlt
- Easy memory management since the API returns data in small pages or events
- Low throughput because data transfer is limited by API constraints (rate limits, response time)

In [None]:
# Request page by page until hiting an empty page
import requests

BASE_API_URL = "https://us-central1-dlthub-analytics.cloudfunctions.net/data_engineering_zoomcamp_api"

In [None]:
# manually paginate data into pages
def paginated_getter():
    page_number = 1
    while True:
        params = {'page': page_number}
        response = requests.get(BASE_API_URL, params=params)
        response.raise_for_status()
        page_json = response.json()
        print(f'Got page {page_number} with {len(page_json)} records')
 
        if page_json:
            yield page_json
            page_number += 1
        else:
            break

In [None]:
for page_data in paginated_getter():
    print(page_data)

### with dlt

- No manual pagination – dlt automatically fetches all pages of data
- Low memory usage – Streams data chunk by chunk, avoiding RAM overflows
- Handles rate limits & retries – Ensures requests are sent efficiently without failures
- Flexible destination support (databases, warehouses, or data lakes)

In [None]:
import dlt
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import PageNumberPaginator

In [None]:
# paginate data into pages (automatically)
def paginated_getter():
    client = RESTClient(
        base_url="https://us-central1-dlthub-analytics.cloudfunctions.net",
        # Define pagination strategy - page-based pagination
        paginator=PageNumberPaginator(   # Pages are numbered (1, 2, 3, ...)
            base_page=1,   # Start from page 1
            total_path=None    # No total count of pages provided by API, pagination should stop when a page contains no result items
        )
    )

    for page in client.paginate("data_engineering_zoomcamp_api"):   # API endpoint for retrieving taxi ride data
        yield page  # remember about memory management and yield data

In [None]:
# print each page data
for page_data in paginated_getter():
    print(page_data)