# Dataset Downloads

This notebooks handles the download of all datasets used in this study.


In [1]:
import os
import zipfile

import cdsapi
import pandas as pd
import xarray as xr
from tqdm.std import tqdm

from deeprec.utils import ROOT_DIR, download_file, download_zip

# Set download path
DL_PATH = ROOT_DIR / "data/raw"
DL_PATH.mkdir(parents=True, exist_ok=True)
print(f"Download path: {DL_PATH}")

Download path: C:\Users\luisg\Repositories\deeprec\data\raw


## Mascons

### JPL Mascons

Global surface mass changes (land + ocean) updated monthly, and is provided on 0.5-degree global grids ([Dataset description](https://grace.jpl.nasa.gov/data/get-data/jpl_global_mascons/)).

In [6]:
start_date = "2002-04-04"
end_date = "2024-12-31"

dataset = "TELLUS_GRAC-GRFO_MASCON_CRI_GRID_RL06.3_V4"
outdir = DL_PATH / "targets/jpl-mascons"

In [3]:
def to_podaac_datetime(date: str) -> str:
    return pd.to_datetime(date).strftime("%Y-%m-%dT%H:%M:%SZ")

In [7]:
!(podaac-data-downloader -c { dataset } -d { outdir } -sd { to_podaac_datetime(start_date) } -ed { to_podaac_datetime(end_date) } -e ".nc")

[2024-12-05 11:42:35,128] {podaac_data_downloader.py:317} INFO - Found 1 total files to download
[2024-12-05 11:42:43,625] {podaac_data_downloader.py:358} INFO - 2024-12-05 11:42:43.625576 SUCCESS: https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/TELLUS_GRAC-GRFO_MASCON_CRI_GRID_RL06.3_V4/GRCTellus.JPL.200204_202409.GLO.RL06.3M.MSCNv04CRI.nc
[2024-12-05 11:42:43,625] {podaac_data_downloader.py:369} INFO - Downloaded Files: 1
[2024-12-05 11:42:43,625] {podaac_data_downloader.py:370} INFO - Failed Files:     0
[2024-12-05 11:42:43,625] {podaac_data_downloader.py:371} INFO - Skipped Files:    0
[2024-12-05 11:42:43,625] {podaac_data_downloader.py:177} INFO - Success Count: 1
[2024-12-05 11:42:44,027] {podaac_data_downloader.py:186} INFO - END




### CSR Mascons

[Website](https://www2.csr.utexas.edu/grace/RL06_mascons.html)

In [None]:
outdir = DL_PATH / "targets/csr-mascons"
url = "https://download.csr.utexas.edu/outgoing/grace/RL0603_mascons/CSR_GRACE_GRACE-FO_RL0603_Mascons_all-corrections.nc"
download_file(url, outdir)

### GSFC Mascons

[Website](https://earth.gsfc.nasa.gov/geo/data/grace-mascons)


In [4]:
outdir = DL_PATH / "targets/gsfc-mascons"
url = "https://earth.gsfc.nasa.gov/sites/default/files/geo/gsfc.glb_.200204_202406_rl06v2.0_obp-ice6gd_halfdegree.nc"
download_file(url, outdir)

## Hydrologic models

### WaterGAP Global Hydrology Model (WGHM) 2.2e

- [Dataset on the Goethe University Data Repository](https://gude.uni-frankfurt.de/entities/researchdata/c53bb505-a620-4860-b2a2-d5a6de74dbd9/details)
- [Preprint of paper on WaterGAP 2.2e](https://doi.org/10.5194/gmd-2023-213)

*Download links might change in the future, currently there is no public API available.*

In [4]:
outdir = DL_PATH / "inputs/watergap22e"
urls = {
    "20crv-era5": "https://api.gude.uni-frankfurt.de/api/core/bitstreams/07183cd6-9d47-4cb2-bc60-00436b0ecd39/content",
    "gswp3-era5": "https://api.gude.uni-frankfurt.de/api/core/bitstreams/879ce7c3-4d21-4ee1-a83c-e830b13b9d2e/content",
    "20crv-w5e5": "https://api.gude.uni-frankfurt.de/api/core/bitstreams/2b22924a-0981-4f6c-886c-542d19db7783/content",
    "gswp3-w5e5": "https://api.gude.uni-frankfurt.de/api/core/bitstreams/adee0d04-c414-420e-85ef-89d3e83e32e9/content",
}
for name, url in (pbar := tqdm(urls.items())):
    pbar.set_postfix_str(f"Downloading watergap22e_{name}")
    download_file(url, outdir)

100%|██████████| 4/4 [00:12<00:00,  3.12s/it, Downloading watergap22e_gswp3-w5e5]


## Weather and climate data

### ERA5

ERA5 data could be combined with the ISIMIP 20CRv3-ERA5 dataset to make up the missing years 2022 and 2023.

- Temporal coverage: 1940 to present
- Temporal frequency:
  - hourly
  - monthly averaged by hour of day (synoptic monthly means)
  - monthly averaged
- Spatial resolution:
  - Reanalysis: 0.25° x 0.25° (atmosphere), 0.5° x 0.5° (ocean waves)
  - Mean, spread and members: 0.5° x 0.5° (atmosphere), 1° x 1° (ocean waves)


Useful links:
- [CDS: ERA5 single levels monthly means](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means)
- [ERA5 data documentation](https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation)
- [How to download ERA5](https://confluence.ecmwf.int/display/CKB/How+to+download+ERA5)

In [2]:
START_YEAR = 1940
END_YEAR = 2023
VARIABLES = [
    "total_precipitation",
    "2m_temperature",
    "2m_dewpoint_temperature",
    "high_vegetation_cover",
    "low_vegetation_cover",
    "evaporation",
    "potential_evaporation",
    "runoff",
    "snowfall",
    "snowmelt",
    "snow_depth",
    "snow_evaporation",
    "surface_pressure",
    "leaf_area_index_high_vegetation",
    "leaf_area_index_low_vegetation",
    "sub_surface_runoff",
    "surface_runoff",
    "volumetric_soil_water_layer_1",
    "volumetric_soil_water_layer_2",
    "volumetric_soil_water_layer_3",
    "volumetric_soil_water_layer_4",
]

dataset_path = DL_PATH / "inputs/era5-monthly"
dataset_path.mkdir(exist_ok=True)

c = cdsapi.Client()
dataset = "reanalysis-era5-single-levels-monthly-means"
for variable in VARIABLES:
    print(f"{'=' * 40}")
    print(f"Downloading `{variable}`...")

    request = {
        "product_type": ["monthly_averaged_reanalysis"],
        "variable": [variable],
        "year": [f"{year}" for year in range(START_YEAR, END_YEAR + 1)],
        "month": [f"{month:02}" for month in range(1, 12 + 1)],
        "time": ["00:00"],
        "data_format": "netcdf",
        "download_format": "unarchived",
    }
    dataset_file = dataset_path / f"era5-monthly_{variable}_{START_YEAR}-{END_YEAR}.nc"
    c.retrieve(dataset, request, dataset_file)

print("Download completed.")

2024-12-05 13:23:41,111 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-05 13:23:41,112 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-05 13:23:41,112 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**


Downloading `2m_temperature`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:23:44,627 INFO Request ID is f955ee6e-2e61-48a4-8932-e385d8ab13a2
2024-12-05 13:23:44,822 INFO status has been updated to accepted
2024-12-05 13:24:01,235 INFO status has been updated to successful


458d2625383eebb0496ef65fb8522080.nc:   0%|          | 0.00/1.28G [00:00<?, ?B/s]

Downloading `2m_dewpoint_temperature`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:28:11,819 INFO Request ID is fa7b5091-71b6-45d5-83a1-12852ad95f5d
2024-12-05 13:28:13,183 INFO status has been updated to accepted
2024-12-05 13:28:21,626 INFO status has been updated to running
2024-12-05 13:32:32,196 INFO status has been updated to successful


21a9f27481bfba6b8cb809a0bc2209d8.nc:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

Downloading `high_vegetation_cover`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:36:30,194 INFO Request ID is 7b3736d2-42f3-40bc-92d4-c6abe0f5206d
2024-12-05 13:36:30,265 INFO status has been updated to accepted
2024-12-05 13:36:39,033 INFO status has been updated to running
2024-12-05 13:40:56,961 INFO status has been updated to successful


e69ec12e3b8a975846d3d67b474b1db9.nc:   0%|          | 0.00/101M [00:00<?, ?B/s]

Downloading `low_vegetation_cover`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:41:11,974 INFO Request ID is 6ff9fa94-8b25-4d1f-a6ee-3c0f69512d97
2024-12-05 13:41:13,100 INFO status has been updated to accepted
2024-12-05 13:41:22,984 INFO status has been updated to running
2024-12-05 13:44:10,467 INFO status has been updated to successful


5dfcb2cefc574ccc0a31b05e076ace08.nc:   0%|          | 0.00/124M [00:00<?, ?B/s]

Downloading `evaporation`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:44:16,440 INFO Request ID is 142c4273-5441-41c9-abfb-a3e6b0737ca3
2024-12-05 13:44:16,867 INFO status has been updated to accepted
2024-12-05 13:44:31,132 INFO status has been updated to running
2024-12-05 13:48:42,552 INFO status has been updated to successful


ebcc0232bdbac3efa3f0148115d787ae.nc:   0%|          | 0.00/1.61G [00:00<?, ?B/s]

Downloading `potential_evaporation`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:53:10,880 INFO Request ID is be789253-72fb-4556-87d9-be96c4db0838
2024-12-05 13:53:13,543 INFO status has been updated to accepted
2024-12-05 13:53:14,611 INFO status has been updated to running
2024-12-05 13:56:10,491 INFO status has been updated to successful


eb7584125878247d38f0a1d9b72d3dfc.nc:   0%|          | 0.00/657M [00:00<?, ?B/s]

Downloading `runoff`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 13:58:19,907 INFO Request ID is 8a039556-c4d0-40a1-a2b0-51cd290cc8c2
2024-12-05 13:58:22,380 INFO status has been updated to accepted
2024-12-05 13:58:24,352 INFO status has been updated to running
2024-12-05 14:01:28,045 INFO status has been updated to successful


918d8e4f91eb5e76ebfd637d6bf41bac.nc:   0%|          | 0.00/345M [00:00<?, ?B/s]

Downloading `snowfall`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:01:45,970 INFO Request ID is 9e16ddbf-1326-4810-9e8c-d628d80336da
2024-12-05 14:01:46,131 INFO status has been updated to accepted
2024-12-05 14:01:51,580 INFO status has been updated to running
2024-12-05 14:02:00,342 INFO status has been updated to successful


e09251f0605b89b3e79f1303bd516876.nc:   0%|          | 0.00/629M [00:00<?, ?B/s]

Downloading `snowmelt`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:03:13,862 INFO Request ID is 5d80a23c-400a-4193-8cab-f714c481227f
2024-12-05 14:03:14,048 INFO status has been updated to accepted
2024-12-05 14:03:17,946 INFO status has been updated to running
2024-12-05 14:03:20,531 INFO status has been updated to successful


46d4dc2dcbb45ee995f5103cab8b3eef.nc:   0%|          | 0.00/149M [00:00<?, ?B/s]

Downloading `snow_depth`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:03:24,773 INFO Request ID is 03421b05-d581-4223-83c5-19a377deb0b4
2024-12-05 14:03:26,089 INFO status has been updated to accepted
2024-12-05 14:03:33,123 INFO status has been updated to successful


d4ff72291f4010abea99ff04ee92fb0b.nc:   0%|          | 0.00/264M [00:00<?, ?B/s]

Downloading `snow_evaporation`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:04:03,015 INFO Request ID is e5d85463-5354-4d40-80f2-c45ef2f0ca80
2024-12-05 14:04:03,076 INFO status has been updated to accepted
2024-12-05 14:04:08,917 INFO status has been updated to running
2024-12-05 14:04:15,667 INFO status has been updated to successful


b706afee74e5fedba79efaf876a24129.nc:   0%|          | 0.00/430M [00:00<?, ?B/s]

Downloading `surface_pressure`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:04:26,714 INFO Request ID is dd87fccf-86b6-46df-87dc-dc281918a5d7
2024-12-05 14:04:28,536 INFO status has been updated to accepted
2024-12-05 14:04:35,523 INFO status has been updated to running
2024-12-05 14:08:55,565 INFO status has been updated to successful


9296bb89cf477e6d853a80e2197d90b6.nc:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

Downloading `leaf_area_index_high_vegetation`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:12:19,250 INFO Request ID is 28468968-6f95-4929-a54f-bc509edca2a6
2024-12-05 14:12:20,183 INFO status has been updated to accepted
2024-12-05 14:12:25,054 INFO status has been updated to running
2024-12-05 14:15:18,043 INFO status has been updated to successful


ad3af6c8611bccca0680ba5499eec8f2.nc:   0%|          | 0.00/372M [00:00<?, ?B/s]

Downloading `leaf_area_index_low_vegetation`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:16:30,745 INFO Request ID is bf9d8c03-0f81-44dd-a9b8-7e537b3466dd
2024-12-05 14:16:31,642 INFO status has been updated to accepted
2024-12-05 14:16:35,983 INFO status has been updated to running
2024-12-05 14:19:29,638 INFO status has been updated to successful


638856ed3fae44e9cbe2b77fb95d167d.nc:   0%|          | 0.00/426M [00:00<?, ?B/s]

Downloading `sub_surface_runoff`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:20:01,974 INFO Request ID is 2ab664e4-cd73-4602-b9ef-b28447e7d1ac
2024-12-05 14:20:02,112 INFO status has been updated to accepted
2024-12-05 14:20:11,790 INFO status has been updated to running
2024-12-05 14:23:00,232 INFO status has been updated to successful


d0721bc9d9865c1786c47b125f3eb96c.nc:   0%|          | 0.00/319M [00:00<?, ?B/s]

Downloading `surface_runoff`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 14:23:34,377 INFO Request ID is b53ee607-2d5e-4b5f-971d-4f3c7fe5a092
2024-12-05 14:23:37,018 INFO status has been updated to accepted
2024-12-05 14:23:39,623 INFO status has been updated to running
2024-12-05 15:07:06,539 INFO status has been updated to successful


2b2a874d546651d65c564d9d3bd5c605.nc:   0%|          | 0.00/264M [00:00<?, ?B/s]

Downloading `volumetric_soil_water_layer_1`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 15:07:45,560 INFO Request ID is 42faea0a-e1ae-4cfe-9906-edbd5f095830
2024-12-05 15:07:45,998 INFO status has been updated to accepted
2024-12-05 15:07:51,656 INFO status has been updated to running
2024-12-05 15:12:12,922 INFO status has been updated to successful


27491e4d9702e5444c7080f1717371dc.nc:   0%|          | 0.00/748M [00:00<?, ?B/s]

Downloading `volumetric_soil_water_layer_2`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 15:14:27,924 INFO Request ID is c4bf8384-bb1c-4430-bf90-a1fcd1401378
2024-12-05 15:14:27,999 INFO status has been updated to accepted
2024-12-05 15:14:34,267 INFO status has been updated to running
2024-12-05 15:17:23,877 INFO status has been updated to successful


60120282ac754c5b3bc5a80375e22115.nc:   0%|          | 0.00/718M [00:00<?, ?B/s]

Downloading `volumetric_soil_water_layer_3`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 15:20:05,026 INFO Request ID is 1e7e38ba-d6b6-4eb7-ba60-af8fd88063f2
2024-12-05 15:20:05,285 INFO status has been updated to accepted
2024-12-05 15:20:12,406 INFO status has been updated to running
2024-12-05 15:24:29,195 INFO status has been updated to successful


3a098a403b8fb8b23bc852bb2e0ec1cd.nc:   0%|          | 0.00/707M [00:00<?, ?B/s]

Downloading `volumetric_soil_water_layer_4`...


[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-05 15:26:25,132 INFO Request ID is d1e39218-b346-4241-8960-700c1962c1e3
2024-12-05 15:26:25,234 INFO status has been updated to accepted
2024-12-05 15:26:30,230 INFO status has been updated to running
2024-12-05 15:29:17,876 INFO status has been updated to successful


b900c1c6aa64cd4229ed330da3fdfc2c.nc:   0%|          | 0.00/449M [00:00<?, ?B/s]

Download completed.


### NOAA Reconstructed Sea Surface Temperature

Download the sea surface temperatures (SST) for calculating the Ocenanic Nino index (ONI). The ONI climate indices provided by [NOAA](https://psl.noaa.gov/data/climateindices/list/) only span to 1950.

[Dataset description](https://psl.noaa.gov/data/gridded/data.noaa.ersst.v5.html)

In [2]:
outdir = DL_PATH / "inputs/noaa-ersst-v5"
url = "https://downloads.psl.noaa.gov/Datasets/noaa.ersst.v5/sst.mnmean.nc"
download_file(url, outdir)

### Glacier mass change

[Dataset description](https://cds.climate.copernicus.eu/datasets/derived-gridded-glacier-mass-change)

In [8]:
START_YEAR = 1975
END_YEAR = 2021

dataset_path = DL_PATH / "inputs/wgms-fog"
dataset_path.mkdir(exist_ok=True)
dataset_file = dataset_path / "wgms_fog_2023_09.zip"

c = cdsapi.Client()
dataset = "derived-gridded-glacier-mass-change"
request = {
    "variable": "glacier_mass_change",
    "hydrological_year": [
        f"{year}_{(year + 1) % 100:02}" for year in range(START_YEAR, END_YEAR + 1)
    ],
    "product_version": "wgms_fog_2023_09",
}
c.retrieve(dataset, request, dataset_file)
print("Download completed.")

2024-12-05 11:01:41,093 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-05 11:01:41,093 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-05 11:01:41,094 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
2024-12-05 11:01:43,302 INFO [2024-04-29T00:00:00] Version WGMS-FOG-2022-09 will be deprecated in the near future. Users are advised to use the latest version.
2024-12-05 11:01:43,302 INFO Request ID is 58edb837-9e6c-40dd-90f7-651a36ea50a6
2024-12-05 11:01:47,

617ff9b5a8163916054bb6132545eced.zip:   0%|          | 0.00/467M [00:00<?, ?B/s]

Download completed.


In [None]:
# Extract ZIP
with zipfile.ZipFile(dataset_file) as zip_file:
    zip_file.extractall(dataset_path)
# Remove ZIP
dataset_file.unlink()

# Combine yearly files
yearly_files = list(dataset_path.glob("*.nc4"))
with xr.open_mfdataset(yearly_files) as ds:
    ds.to_netcdf(dataset_path / "wgms-fog-2023-09_glacier-mass-change.nc")
# Remove yearly files
for file in yearly_files:
    file.unlink()

## Human influences

### ISIMIP Land Use

[Data set description](https://www.isimip.org/gettingstarted/input-data-bias-adjustment/details/82/)

In [4]:
outdir = DL_PATH / "inputs/landuse"
urls = {
    "5crops": "https://files.isimip.org/ISIMIP3a/InputData/socioeconomic/landuse/histsoc/landuse-5crops_histsoc_annual_1901_2021.nc",
    "15crops": "https://files.isimip.org/ISIMIP3a/InputData/socioeconomic/landuse/histsoc/landuse-15crops_histsoc_annual_1901_2021.nc",
    "pastures": "https://files.isimip.org/ISIMIP3a/InputData/socioeconomic/landuse/histsoc/landuse-pastures_histsoc_annual_1901_2021.nc",
    "totals": "https://files.isimip.org/ISIMIP3a/InputData/socioeconomic/landuse/histsoc/landuse-totals_histsoc_annual_1901_2021.nc",
    "urbanareas": "https://files.isimip.org/ISIMIP3a/InputData/socioeconomic/landuse/histsoc/landuse-urbanareas_histsoc_annual_1901_2021.nc",
}
for name, url in (pbar := tqdm(urls.items(), desc="Downloading landuse")):
    pbar.set_postfix_str(name)
    download_file(url, outdir)

Downloading landuse: 100%|██████████| 5/5 [00:11<00:00,  2.38s/it, urbanareas]


### ISIMIP Lake area fraction

[Data set description](https://www.isimip.org/gettingstarted/input-data-bias-adjustment/details/132/)

In [3]:
outdir = DL_PATH / "inputs/pctlake"
url = "https://files.isimip.org/ISIMIP3a/InputData/socioeconomic/lakes/histsoc/pctlake_histsoc_1901_2021.nc"
download_file(url, outdir)

## Shapes

### GRDC Major River Basins

Major river basins by the Global Runoff Data Center. The basins incorporate HydroBASINS data and are named. They are available as Shapefile and geoJSON

- [Description](https://www.bafg.de/GRDC/EN/02_srvcs/22_gslrs/221_MRB/riverbasins_node.html)
- [Map and download](https://mrb.grdc.bafg.de/)

In [None]:
url = "https://grdc.bafg.de/downloads/GRDC_Major_River_Basins_shp.zip"
path = DL_PATH / "shapefiles/mrb"

download_zip(url, path)
print("Download completed.")

Download completed.


### NaturalEarth 1:50 countries

[Description](https://www.naturalearthdata.com/downloads/50m-cultural-vectors/50m-admin-0-countries-2/)

In [None]:
url = "https://naturalearth.s3.amazonaws.com/5.0.1/50m_cultural/ne_50m_admin_0_countries.zip"
path = DL_PATH / "shapefiles/naturalearth"

download_zip(url, path)
print("Download completed.")

Download completed.


## Previous TWS reconstructions
### Humphrey, 2019

[Data on figshare](https://figshare.com/articles/dataset/GRACE-REC_A_reconstruction_of_climate-driven_water_storage_changes_over_the_last_century/7670849)

In [16]:
url = "https://figshare.com/ndownloader/files/17990285"
path = DL_PATH / "reconstructions/humphrey"
path.mkdir(parents=True, exist_ok=True)

download_zip(url, path)
print("Download completed.")

Download completed.


### Li, 2021

[Download here on DRYAD](https://datadryad.org/stash/dataset/doi:10.5061/dryad.z612jm6bt)

### Chandanpurkar, 2022

[Data on zenodo](https://zenodo.org/records/6659543)

In [15]:
url = "https://zenodo.org/records/6659543/files/tws_cseof_v1.nc"
path = DL_PATH / "reconstructions/chandanpurkar"

download_file(url, path)
print("Download completed.")


Download completed.


### Yin, 2023

[Data on zenodo](https://zenodo.org/records/10040927)

In [None]:
path = DL_PATH / "reconstructions/yin"
urls = [
    "https://zenodo.org/records/10040927/files/CSR-based%20GTWS-MLrec%20TWS.nc",
    "https://zenodo.org/records/10040927/files/GSFC-based%20GTWS-MLrec%20TWS.nc",
    "https://zenodo.org/records/10040927/files/JPL-based%20GTWS-MLrec%20TWS.nc",
]

for url in tqdm(urls):
    download_file(url, path)


### Palazzoli, 2025

[Data on zenodo](https://zenodo.org/records/10953658)

In [None]:
url = "https://zenodo.org/records/10953658/files/GRAiCE_BiLSTM.nc"
path = DL_PATH / "reconstructions/palazzoli"

download_file(url, path)
print("Download completed.")
