# Converting netcdfs from zenodo to zarr stores on the persistent leap bucket

In [20]:
import xarray as xr
import gcsfs

In [21]:
target_bucket = 'gs://leap-persistent/jbusecke/data/climatebench'
fs = gcsfs.GCSFileSystem()

In [22]:
# download the files from zenodo
import pooch

link_dict = {
    'train_val':['https://zenodo.org/record/7064308/files/train_val.tar.gz', None],
    'test': ['https://zenodo.org/record/7064308/files/test.tar.gz', None],
}
file_dict = {name: pooch.retrieve(url, checksum, processor=pooch.Untar()) for name, (url, checksum) in link_dict.items()}
dataset_dict = {name: {file.split('/')[-1].replace('.nc', ''): xr.open_dataset(file) for file in files} for name, files in file_dict.items()}

In [24]:
# save to cloud bucket
for subdir, ds_dict in dataset_dict.items():
    print(f"Populating subdir: {subdir}")
    for file, ds in ds_dict.items():
        store = f"{target_bucket}/{subdir}/{file}.zarr"
        print(store)
        mapper = fs.get_mapper(store)
        ds.to_zarr(mapper, consolidated=True, mode='w')

Populating subdir: train_val
gs://leap-persistent/jbusecke/data/climatebench/train_val/inputs_ssp370-lowNTCF.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/inputs_historical.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/outputs_ssp126.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/inputs_abrupt-4xCO2.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/inputs_ssp370.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/outputs_historical.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/inputs_hist-GHG.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/outputs_piControl.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/outputs_hist-GHG.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/outputs_ssp370.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/inputs_hist-aer.zarr
gs://leap-persistent/jbusecke/data/climatebench/train_val/outputs_ssp370-lowNTCF.zarr
gs