From a33bccc40966b1016e58071dc4e204224f3b3aa1 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Thu, 18 Nov 2021 09:00:47 +0000 Subject: [PATCH 1/2] exists is not a property --- .../data_sources/satellite/satellite_data_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py index b742dda4..1809664c 100644 --- a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py +++ b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py @@ -238,7 +238,7 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray: # seems to slow things down a lot if the Zarr store has more than # about a million chunks. # See https://github.com/openclimatefix/nowcasting_dataset/issues/23 - if Path(zarr_path).exists: + if Path(zarr_path).exists(): # For opening a single Zarr store, we can use the simpler open_dataset dataset = xr.open_dataset( zarr_path, engine="zarr", consolidated=consolidated, mode="r", chunks=None From c0a691f05e56abbb4efe0adb3ce703dab37e9c5f Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Thu, 18 Nov 2021 09:04:28 +0000 Subject: [PATCH 2/2] Always use open_mfdataset --- .../satellite/satellite_data_source.py | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py index 1809664c..3f54f8b6 100644 --- a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py +++ b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py @@ -2,7 +2,6 @@ import logging from dataclasses import InitVar, dataclass from numbers import Number -from pathlib import Path from typing import Iterable, Optional import numpy as np @@ -238,22 +237,20 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray: # seems to slow things down a lot if the Zarr store has more than # about a million chunks. # See https://github.com/openclimatefix/nowcasting_dataset/issues/23 - if Path(zarr_path).exists(): - # For opening a single Zarr store, we can use the simpler open_dataset - dataset = xr.open_dataset( - zarr_path, engine="zarr", consolidated=consolidated, mode="r", chunks=None - ) - else: - # If we are opening multiple Zarr stores (i.e. one for each month of the year) we load them - # together and create a single dataset from them - dataset = xr.open_mfdataset( - zarr_path, - chunks=None, - mode="r", - engine="zarr", - concat_dim="time", - preprocess=remove_acq_time_from_dataset, - ) + + # If we are opening multiple Zarr stores (i.e. one for each month of the year) we load them + # together and create a single dataset from them. open_mfdataset also works if zarr_path + # points to a specific zarr directory (with no wildcards). + dataset = xr.open_mfdataset( + zarr_path, + chunks=None, + mode="r", + engine="zarr", + concat_dim="time", + preprocess=remove_acq_time_from_dataset, + consolidated=consolidated, + combine="nested", + ) data_array = dataset["stacked_eumetsat_data"] del dataset