diff --git a/api-reference/python/tilebox.datasets/Collection.find.mdx b/api-reference/python/tilebox.datasets/Collection.find.mdx index 3c8e39b..654c4d4 100644 --- a/api-reference/python/tilebox.datasets/Collection.find.mdx +++ b/api-reference/python/tilebox.datasets/Collection.find.mdx @@ -19,7 +19,7 @@ Find a specific datapoint in a collection by its id. - Whether to skip loading the data for the datapoint. If `True`, only the metadata for the datapoint is loaded. + If `True`, the response contains only the ID and the timestamp for the datapoint. Defaults to `False`. ## Returns @@ -38,7 +38,17 @@ Since it returns only a single data point, the output xarray dataset does not in ```python Python data = collection.find( "0186d6b6-66cc-fcfd-91df-bbbff72499c3", - skip_data = False, ) + + +# check if a datapoint exists +try: + collection.find( + "0186d6b6-66cc-fcfd-91df-bbbff72499c3", + skip_data=True, + ) + exists = True +except NotFoundError: + exists = False ``` diff --git a/api-reference/python/tilebox.datasets/Collection.query.mdx b/api-reference/python/tilebox.datasets/Collection.query.mdx index c5feb42..e6f7a69 100644 --- a/api-reference/python/tilebox.datasets/Collection.query.mdx +++ b/api-reference/python/tilebox.datasets/Collection.query.mdx @@ -32,7 +32,7 @@ If no data exists for the requested time or interval, an empty `xarray.Dataset` - If `True`, the response contains only the [required fields for the dataset type](/datasets/types/timeseries) without the actual dataset-specific fields. Defaults to `False`. + If `True`, the response contains only the ID and the timestamp for each datapoint. Defaults to `False`. @@ -54,7 +54,13 @@ data = collection.query(temporal_extent=time) # querying a time interval interval = ("2023-05-01", "2023-08-01") -data = collection.query(temporal_extent=interval, show_progress=True) +data = collection.query(temporal_extent=interval) + +# displaying a progress bar while querying +data = collection.query( + temporal_extent=interval, + show_progress=True, +) # querying a time interval with TimeInterval interval = TimeInterval( @@ -63,11 +69,13 @@ interval = TimeInterval( start_exclusive=False, end_inclusive=False, ) -data = collection.query(temporal_extent=interval, show_progress=True) +data = collection.query(temporal_extent=interval) # querying with an iterable -meta_data = collection.query(temporal_extent=..., skip_data=True) -first_50 = collection.query(temporal_extent=meta_data.time[:50], skip_data=False) +datapoints = collection.query( + temporal_extent=interval, + skip_data=True, # only fetch datapoint IDs and time +) +first_50 = collection.query(temporal_extent=datapoints.time[:50]) ``` - diff --git a/datasets/delete.mdx b/datasets/delete.mdx index 9582375..1bc263d 100644 --- a/datasets/delete.mdx +++ b/datasets/delete.mdx @@ -89,15 +89,15 @@ Deleted 2 data points. ## Deleting a time interval -One common way to delete data is to first load it from a collection and then forward it to the `delete` method. For -this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid loading the data fields, -since you only need the datapoint IDs. See [fetching only metadata](/datasets/query#fetching-only-metadata) for more details. +One common way to delete all datapoints in a time interval is to first query it from a collection and then deleting those +found datapoints. For this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid actually +loading the data fields, since only the datapoint IDs are required. See [skipping data fields](/datasets/query#skipping-data-fields) for more details. ```python Python to_delete = collection.query(temporal_extent=("2023-05-01", "2023-06-01"), skip_data=True) -n_deleted = collection.delete(datapoints) +n_deleted = collection.delete(to_delete) print(f"Deleted {n_deleted} data points.") ``` ```go Go diff --git a/datasets/query.mdx b/datasets/query.mdx index 44ec4cf..b5a6205 100644 --- a/datasets/query.mdx +++ b/datasets/query.mdx @@ -274,9 +274,9 @@ You can specify a time interval by using an iterable of `TimeScalar`s as the `te ```python Python interval = ("2017-01-01", "2023-01-01") - meta_data = collection.query(temporal_extent=interval, skip_data=True) + found_datapoints = collection.query(temporal_extent=interval, skip_data=True) - first_50_data_points = collection.query(temporal_extent=meta_data.time[:50], skip_data=False) + first_50_data_points = collection.query(temporal_extent=found_datapoints.time[:50]) print(first_50_data_points) ``` @@ -423,19 +423,23 @@ if err != nil { ``` -## Fetching only metadata +## Skipping data fields -Sometimes, it may be useful to load only dataset metadata fields without the actual data fields. This can be done by setting the `skip_data` parameter to `True`. -For example, when only checking if a datapoint exists, you may want to use `skip_data=True` to avoid loading the data fields. -If this flag is set, the response will only include the required fields for the given dataset type, but no custom data fields. +Sometimes, only the ID or timestamp associated with a datapoint is required. In this case, loading the full data fields for each datapoint is not necessary and can be avoided by +setting the `skip_data` parameter to `True`. + +For example, when only checking how many datapoints exist in a given time interval, you can use `skip_data=True` to avoid loading the data fields. ```python Python - data = collection.query(temporal_extent="2024-08-01 00:00:01.362", skip_data=True) - print(data) + interval = ("2023-01-01", "2023-02-01") + data = collection.query(temporal_extent=interval, skip_data=True) + print(f"Found {data.sizes['time']} data points.") ``` ```go Go -temporalExtent := query.NewPointInTime(time.Date(2024, time.August, 1, 0, 0, 1, 362000000, time.UTC)) +startDate := time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC) +endDate := time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC) +interval := query.NewTimeInterval(startDate, endDate) var datapoints []*v1.Sentinel1Sar err = client.Datapoints.QueryInto(ctx, @@ -592,10 +596,15 @@ Data variables: (12/30) - You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `load`. + You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `query`. ## Automatic pagination Querying large time intervals can return a large number of data points. Tilebox automatically handles pagination for you by sending paginated requests to the server. + + +When using the python SDK in an interactive notebook environment, you can additionally also display a +progress bar to keep track of the progress of the query by setting the `show_progress` parameter to `True`. +