diff --git a/api-reference/python/tilebox.datasets/Collection.find.mdx b/api-reference/python/tilebox.datasets/Collection.find.mdx
index 3c8e39b..654c4d4 100644
--- a/api-reference/python/tilebox.datasets/Collection.find.mdx
+++ b/api-reference/python/tilebox.datasets/Collection.find.mdx
@@ -19,7 +19,7 @@ Find a specific datapoint in a collection by its id.
- Whether to skip loading the data for the datapoint. If `True`, only the metadata for the datapoint is loaded.
+ If `True`, the response contains only the ID and the timestamp for the datapoint. Defaults to `False`.
## Returns
@@ -38,7 +38,17 @@ Since it returns only a single data point, the output xarray dataset does not in
```python Python
data = collection.find(
"0186d6b6-66cc-fcfd-91df-bbbff72499c3",
- skip_data = False,
)
+
+
+# check if a datapoint exists
+try:
+ collection.find(
+ "0186d6b6-66cc-fcfd-91df-bbbff72499c3",
+ skip_data=True,
+ )
+ exists = True
+except NotFoundError:
+ exists = False
```
diff --git a/api-reference/python/tilebox.datasets/Collection.query.mdx b/api-reference/python/tilebox.datasets/Collection.query.mdx
index c5feb42..e6f7a69 100644
--- a/api-reference/python/tilebox.datasets/Collection.query.mdx
+++ b/api-reference/python/tilebox.datasets/Collection.query.mdx
@@ -32,7 +32,7 @@ If no data exists for the requested time or interval, an empty `xarray.Dataset`
- If `True`, the response contains only the [required fields for the dataset type](/datasets/types/timeseries) without the actual dataset-specific fields. Defaults to `False`.
+ If `True`, the response contains only the ID and the timestamp for each datapoint. Defaults to `False`.
@@ -54,7 +54,13 @@ data = collection.query(temporal_extent=time)
# querying a time interval
interval = ("2023-05-01", "2023-08-01")
-data = collection.query(temporal_extent=interval, show_progress=True)
+data = collection.query(temporal_extent=interval)
+
+# displaying a progress bar while querying
+data = collection.query(
+ temporal_extent=interval,
+ show_progress=True,
+)
# querying a time interval with TimeInterval
interval = TimeInterval(
@@ -63,11 +69,13 @@ interval = TimeInterval(
start_exclusive=False,
end_inclusive=False,
)
-data = collection.query(temporal_extent=interval, show_progress=True)
+data = collection.query(temporal_extent=interval)
# querying with an iterable
-meta_data = collection.query(temporal_extent=..., skip_data=True)
-first_50 = collection.query(temporal_extent=meta_data.time[:50], skip_data=False)
+datapoints = collection.query(
+ temporal_extent=interval,
+ skip_data=True, # only fetch datapoint IDs and time
+)
+first_50 = collection.query(temporal_extent=datapoints.time[:50])
```
-
diff --git a/datasets/delete.mdx b/datasets/delete.mdx
index 9582375..1bc263d 100644
--- a/datasets/delete.mdx
+++ b/datasets/delete.mdx
@@ -89,15 +89,15 @@ Deleted 2 data points.
## Deleting a time interval
-One common way to delete data is to first load it from a collection and then forward it to the `delete` method. For
-this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid loading the data fields,
-since you only need the datapoint IDs. See [fetching only metadata](/datasets/query#fetching-only-metadata) for more details.
+One common way to delete all datapoints in a time interval is to first query it from a collection and then deleting those
+found datapoints. For this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid actually
+loading the data fields, since only the datapoint IDs are required. See [skipping data fields](/datasets/query#skipping-data-fields) for more details.
```python Python
to_delete = collection.query(temporal_extent=("2023-05-01", "2023-06-01"), skip_data=True)
-n_deleted = collection.delete(datapoints)
+n_deleted = collection.delete(to_delete)
print(f"Deleted {n_deleted} data points.")
```
```go Go
diff --git a/datasets/query.mdx b/datasets/query.mdx
index 44ec4cf..b5a6205 100644
--- a/datasets/query.mdx
+++ b/datasets/query.mdx
@@ -274,9 +274,9 @@ You can specify a time interval by using an iterable of `TimeScalar`s as the `te
```python Python
interval = ("2017-01-01", "2023-01-01")
- meta_data = collection.query(temporal_extent=interval, skip_data=True)
+ found_datapoints = collection.query(temporal_extent=interval, skip_data=True)
- first_50_data_points = collection.query(temporal_extent=meta_data.time[:50], skip_data=False)
+ first_50_data_points = collection.query(temporal_extent=found_datapoints.time[:50])
print(first_50_data_points)
```
@@ -423,19 +423,23 @@ if err != nil {
```
-## Fetching only metadata
+## Skipping data fields
-Sometimes, it may be useful to load only dataset metadata fields without the actual data fields. This can be done by setting the `skip_data` parameter to `True`.
-For example, when only checking if a datapoint exists, you may want to use `skip_data=True` to avoid loading the data fields.
-If this flag is set, the response will only include the required fields for the given dataset type, but no custom data fields.
+Sometimes, only the ID or timestamp associated with a datapoint is required. In this case, loading the full data fields for each datapoint is not necessary and can be avoided by
+setting the `skip_data` parameter to `True`.
+
+For example, when only checking how many datapoints exist in a given time interval, you can use `skip_data=True` to avoid loading the data fields.
```python Python
- data = collection.query(temporal_extent="2024-08-01 00:00:01.362", skip_data=True)
- print(data)
+ interval = ("2023-01-01", "2023-02-01")
+ data = collection.query(temporal_extent=interval, skip_data=True)
+ print(f"Found {data.sizes['time']} data points.")
```
```go Go
-temporalExtent := query.NewPointInTime(time.Date(2024, time.August, 1, 0, 0, 1, 362000000, time.UTC))
+startDate := time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC)
+endDate := time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC)
+interval := query.NewTimeInterval(startDate, endDate)
var datapoints []*v1.Sentinel1Sar
err = client.Datapoints.QueryInto(ctx,
@@ -592,10 +596,15 @@ Data variables: (12/30)
- You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `load`.
+ You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `query`.
## Automatic pagination
Querying large time intervals can return a large number of data points.
Tilebox automatically handles pagination for you by sending paginated requests to the server.
+
+
+When using the python SDK in an interactive notebook environment, you can additionally also display a
+progress bar to keep track of the progress of the query by setting the `show_progress` parameter to `True`.
+