# This notebook download the data of NEX-GDDP-CMIP6
##### Author: Omid Emamjomehzadeh (https://www.omidemam.com/)
##### Supervisor: Dr. Omar Wani (https://engineering.nyu.edu/faculty/omar-wani)
##### Hydrologic Systems Group @NYU (https://www.omarwani.com/)

In [2]:
#import libraries
import os
import time
import xarray as xr
import pandas as pd
import fsspec
from pystac_client import Client
import planetary_computer
import datetime

In [None]:

# Ensure output directory exists
output_dir = r"D:\NEX-GDDP-CMIP6"
os.makedirs(output_dir, exist_ok=True)

# STAC Catalog connection with retry
catalog_url = "https://planetarycomputer-test.microsoft.com/stac"
max_retries = 10
delay = 3  # seconds

for attempt in range(1, max_retries + 1):
    try:
        catalog = Client.open(
            catalog_url,
            modifier=planetary_computer.sign_inplace,
        )
        # Search for GFDL-ESM4 historical data
        search = catalog.search(
            collections=["nasa-nex-gddp-cmip6"],
            datetime="1979/2014",
            query={
                "cmip6:model": {"eq": "GFDL-ESM4"},
                "cmip6:scenario": {"eq": "historical"},
            },
        )

        items = search.item_collection()
        print("✅ Successfully connected to the STAC catalog.")
        break
    except Exception as e:
        print(f"⚠️ Attempt {attempt} failed: {e}")
        time.sleep(delay)
        
else:
    raise RuntimeError(f"❌ Failed to connect after {max_retries} attempts.")



# Loop through items and variables
item =items[10]
for var in ['pr', 'tas', 'hurs', 'huss', 'rlds', 'rsds', 'tasmax', 'tasmin', 'sfcWind']:
    try:
        # Open dataset
        ds = xr.open_dataset(fsspec.open(item.assets[var].href).open())

        # Convert longitude if needed
        if ds.lon.max() > 180:
            ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))
            ds = ds.sortby('lon')

        # NYS bounding box
        lat_min, lat_max = 40, 45.25
        lon_min, lon_max = -79.75, -72

        # Subset
        ds_nys = ds[var].sel(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max))

        # Loop through time
        for i in range(len(ds.time)):
            time_val = ds.time.values[i].strftime("%Y-%m-%d")
            output_path = os.path.join(output_dir, f"{var}_{time_val}.nc")

            # Skip if file exists
            if os.path.exists(output_path):
                print(f"⏩ Skipping (already exists): {output_path}")
                continue

            daily_slice = ds_nys.isel(time=i).expand_dims("time")

            # Save compressed NetCDF
            daily_slice.to_netcdf(
                output_path,
                engine="netcdf4",
                encoding={var: {"zlib": True, "complevel": 6, "shuffle": True}}
            )
            print(f"✅ Saved: {output_path}")
    except Exception as e:
        print(f"⚠️ Failed to process {var} in {item.id}: {e}")


⚠️ Attempt 1 failed: Internal Server Error
⚠️ Attempt 2 failed: Internal Server Error
✅ Successfully connected to the STAC catalog.
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-01.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-02.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-03.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-04.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-05.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-06.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-07.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-08.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-09.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-10.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-11.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-12.nc
⏩ Skipping (already exists): D:\NEX-GDDP-CMIP6\pr_2004-01-13.nc
⏩ Skipping (already exists): D:\NEX-

In [11]:
%load_ext watermark
# Print the Python version and some dependencies
%watermark -v -m -p xarray,pandas,fsspec,pystac_client,planetary_computer,datetime

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Python implementation: CPython
Python version       : 3.12.4
IPython version      : 8.20.0

xarray            : 2024.7.0
pandas            : 2.2.2
fsspec            : 2025.3.0
pystac_client     : 0.8.3
planetary_computer: 1.0.0
datetime          : unknown

Compiler    : MSC v.1940 64 bit (AMD64)
OS          : Windows
Release     : 11
Machine     : AMD64
Processor   : Intel64 Family 6 Model 183 Stepping 1, GenuineIntel
CPU cores   : 24
Architecture: 64bit

