# 2a processing
- this code downloads and merges nldas for growing degree days, extreme degree days calculation

## downloading data

### Packages

In [89]:
import requests
import re
import os
import numpy as np
import datetime
import xarray
import dask

### Functions

In [86]:
def downloadData(url, output_path):
    '''
    Given a URL, assuming that we are logged into a session, 
    this function downloads data from NASA Earthdata and saves it to output_path.
    
    inputs:
      url: a string representing the file URL.
      output_path: the directory where the file should be saved.
    
    returns: nothing
    '''
    response = session.get(url, auth=(username, password), stream=True)
    if response.status_code == 200:
        # Try to get the filename from the Content-Disposition header
        cd = response.headers.get("content-disposition")
        if cd:
            fname_match = re.findall('filename="?([^";]+)"?', cd)
            if fname_match:
                filename = fname_match[0]
            else:
                filename = url.split("/")[-1]
        else:
            filename = url.split("/")[-1]
            
        # Write the content to a file in chunks
        with open(output_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:  # filter out keep-alive chunks
                    f.write(chunk)
        
def singleYearUrl(year):
    '''
    given a year, returns all possible urls for the year
    inputs year
    outputs
    # could and should be linked with download after every value in while loop. mkdir for each year
    '''
    tmp_downloads_dir = f"{output_dir}/{year}"
    
    try:
        os.mkdir(f"{tmp_downloads_dir}")
    except Exception:
        pass
        
    start_dt = datetime.datetime(year, 1, 1, 0, 0)
    end_dt = datetime.datetime(year, 12, 31, 23, 0)
    
    current_dt = start_dt
    while current_dt <= end_dt:
        julian_day = current_dt.strftime("%j")
        yyyymmdd = current_dt.strftime("%Y%m%d")
        # Get hour and minute as HHMM (e.g., "0000", "0100", etc.)
        hour_str = current_dt.strftime("%H%M")
        
        url = (f"{base_url}/{year}/{julian_day}/"
               f"NLDAS_FORA0125_H.A{yyyymmdd}.{hour_str}.020.nc")
    
        current_dt += datetime.timedelta(hours=1)
        downloadData(url, f"{tmp_downloads_dir}/NLDAS_FORA0125_H.A{yyyymmdd}.{hour_str}.020.nc")

### Inputs

In [87]:
# inputs 
output_dir = "/storage/home/cta5244/work/pyWBM_yield_data/NCEPNARR_NLDAS_Hist_Temp/tmp_downloads"
base_url = "https://hydro1.gesdisc.eosdis.nasa.gov/data/NLDAS/NLDAS_FORA0125_H.2.0"
start_year = 1979
end_year = 2026
username = os.environ.get("earthnasa_user")
password = os.environ.get("earthnasa_pass")

session = requests.Session()

### dask implementation

In [92]:
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    # account="pches",
    account="open",
    cores=1,
    memory="20GiB",
    walltime="03:00:00",
)

cluster.scale(jobs=30) 

2025-02-21 17:55:19,723 - distributed.core - INFO - Connection to tcp://10.6.8.47:51452 has been closed.
2025-02-21 17:55:19,724 - distributed.scheduler - INFO - Remove worker addr: tcp://10.6.8.47:41693 name: SLURMCluster-12 (stimulus_id='handle-worker-cleanup-1740178519.7241838')
2025-02-21 17:55:19,727 - distributed.core - INFO - Connection to tcp://10.6.8.47:51438 has been closed.
2025-02-21 17:55:19,727 - distributed.scheduler - INFO - Remove worker addr: tcp://10.6.8.47:34745 name: SLURMCluster-23 (stimulus_id='handle-worker-cleanup-1740178519.7278957')
2025-02-21 17:55:19,728 - distributed.batched - INFO - Batched Comm Closed <TCP (closed) Scheduler connection to worker local=tcp://146.186.150.13:46555 remote=tcp://10.6.8.47:51438>
Traceback (most recent call last):
  File "/storage/home/cta5244/mambaforge/envs/pyWBM/lib/python3.10/site-packages/distributed/comm/tcp.py", line 298, in write
    raise StreamClosedError()
tornado.iostream.StreamClosedError: Stream is closed

The ab

In [99]:
from dask.distributed import Client

client = Client(cluster)



+---------+----------+-----------+----------+
| Package | Client   | Scheduler | Workers  |
+---------+----------+-----------+----------+
| dask    | 2025.1.0 | 2025.1.0  | 2025.2.0 |
+---------+----------+-----------+----------+


In [101]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://146.186.150.13:8787/status,

0,1
Dashboard: http://146.186.150.13:8787/status,Workers: 30
Total threads: 30,Total memory: 600.00 GiB

0,1
Comm: tcp://146.186.150.13:46555,Workers: 30
Dashboard: http://146.186.150.13:8787/status,Total threads: 30
Started: Just now,Total memory: 600.00 GiB

0,1
Comm: tcp://10.6.8.51:39437,Total threads: 1
Dashboard: http://10.6.8.51:46635/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.51:40679,
Local directory: /tmp/dask-scratch-space/worker-_b68ouy5,Local directory: /tmp/dask-scratch-space/worker-_b68ouy5

0,1
Comm: tcp://10.6.8.48:36327,Total threads: 1
Dashboard: http://10.6.8.48:35879/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.48:42681,
Local directory: /tmp/dask-scratch-space/worker-d5mvjyku,Local directory: /tmp/dask-scratch-space/worker-d5mvjyku

0,1
Comm: tcp://10.6.8.51:44771,Total threads: 1
Dashboard: http://10.6.8.51:42381/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.51:34219,
Local directory: /tmp/dask-scratch-space/worker-3xy4pgr7,Local directory: /tmp/dask-scratch-space/worker-3xy4pgr7

0,1
Comm: tcp://10.6.8.77:34497,Total threads: 1
Dashboard: http://10.6.8.77:40635/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.77:45033,
Local directory: /tmp/dask-scratch-space/worker-4b2mazfo,Local directory: /tmp/dask-scratch-space/worker-4b2mazfo

0,1
Comm: tcp://10.6.8.47:41693,Total threads: 1
Dashboard: http://10.6.8.47:41749/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.47:40891,
Local directory: /tmp/dask-scratch-space/worker-7c56d1xr,Local directory: /tmp/dask-scratch-space/worker-7c56d1xr

0,1
Comm: tcp://10.6.8.68:38081,Total threads: 1
Dashboard: http://10.6.8.68:45741/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.68:43423,
Local directory: /tmp/dask-scratch-space/worker-mmzq39p1,Local directory: /tmp/dask-scratch-space/worker-mmzq39p1

0,1
Comm: tcp://10.6.8.67:36355,Total threads: 1
Dashboard: http://10.6.8.67:44151/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.67:39405,
Local directory: /tmp/dask-scratch-space/worker-j36btij9,Local directory: /tmp/dask-scratch-space/worker-j36btij9

0,1
Comm: tcp://10.6.8.66:39139,Total threads: 1
Dashboard: http://10.6.8.66:42181/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.66:45677,
Local directory: /tmp/dask-scratch-space/worker-5az45_28,Local directory: /tmp/dask-scratch-space/worker-5az45_28

0,1
Comm: tcp://10.6.8.73:36167,Total threads: 1
Dashboard: http://10.6.8.73:34295/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.73:33517,
Local directory: /tmp/dask-scratch-space/worker-902jfkcl,Local directory: /tmp/dask-scratch-space/worker-902jfkcl

0,1
Comm: tcp://10.6.8.57:36611,Total threads: 1
Dashboard: http://10.6.8.57:35989/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.57:33277,
Local directory: /tmp/dask-scratch-space/worker-yv7g6_0y,Local directory: /tmp/dask-scratch-space/worker-yv7g6_0y

0,1
Comm: tcp://10.6.8.62:37403,Total threads: 1
Dashboard: http://10.6.8.62:35853/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.62:36353,
Local directory: /tmp/dask-scratch-space/worker-9toia15l,Local directory: /tmp/dask-scratch-space/worker-9toia15l

0,1
Comm: tcp://10.6.8.58:35551,Total threads: 1
Dashboard: http://10.6.8.58:41915/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.58:37435,
Local directory: /tmp/dask-scratch-space/worker-l0kcdsk6,Local directory: /tmp/dask-scratch-space/worker-l0kcdsk6

0,1
Comm: tcp://10.6.8.73:41749,Total threads: 1
Dashboard: http://10.6.8.73:36615/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.73:46785,
Local directory: /tmp/dask-scratch-space/worker-p17ovzzf,Local directory: /tmp/dask-scratch-space/worker-p17ovzzf

0,1
Comm: tcp://10.6.8.71:41737,Total threads: 1
Dashboard: http://10.6.8.71:41003/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.71:33845,
Local directory: /tmp/dask-scratch-space/worker-g2_2ndi2,Local directory: /tmp/dask-scratch-space/worker-g2_2ndi2

0,1
Comm: tcp://10.6.8.54:42687,Total threads: 1
Dashboard: http://10.6.8.54:43535/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.54:39477,
Local directory: /tmp/dask-scratch-space/worker-z3av70c0,Local directory: /tmp/dask-scratch-space/worker-z3av70c0

0,1
Comm: tcp://10.6.8.69:46543,Total threads: 1
Dashboard: http://10.6.8.69:33753/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.69:46747,
Local directory: /tmp/dask-scratch-space/worker-gl04qmwm,Local directory: /tmp/dask-scratch-space/worker-gl04qmwm

0,1
Comm: tcp://10.6.8.47:34745,Total threads: 1
Dashboard: http://10.6.8.47:43183/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.47:46693,
Local directory: /tmp/dask-scratch-space/worker-ub3q1l6t,Local directory: /tmp/dask-scratch-space/worker-ub3q1l6t

0,1
Comm: tcp://10.6.8.54:43547,Total threads: 1
Dashboard: http://10.6.8.54:35125/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.54:42877,
Local directory: /tmp/dask-scratch-space/worker-lpi1ir6j,Local directory: /tmp/dask-scratch-space/worker-lpi1ir6j

0,1
Comm: tcp://10.6.8.79:44961,Total threads: 1
Dashboard: http://10.6.8.79:46415/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.79:43025,
Local directory: /tmp/dask-scratch-space/worker-caz75wxm,Local directory: /tmp/dask-scratch-space/worker-caz75wxm

0,1
Comm: tcp://10.6.8.59:36553,Total threads: 1
Dashboard: http://10.6.8.59:42735/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.59:37465,
Local directory: /tmp/dask-scratch-space/worker-b9mpbh7a,Local directory: /tmp/dask-scratch-space/worker-b9mpbh7a

0,1
Comm: tcp://10.6.8.70:39407,Total threads: 1
Dashboard: http://10.6.8.70:44073/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.70:40721,
Local directory: /tmp/dask-scratch-space/worker-wh6regdk,Local directory: /tmp/dask-scratch-space/worker-wh6regdk

0,1
Comm: tcp://10.6.8.55:44923,Total threads: 1
Dashboard: http://10.6.8.55:39109/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.55:40659,
Local directory: /tmp/dask-scratch-space/worker-fh579n4n,Local directory: /tmp/dask-scratch-space/worker-fh579n4n

0,1
Comm: tcp://10.6.8.53:45503,Total threads: 1
Dashboard: http://10.6.8.53:36015/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.53:33309,
Local directory: /tmp/dask-scratch-space/worker-jsmqv3dh,Local directory: /tmp/dask-scratch-space/worker-jsmqv3dh

0,1
Comm: tcp://10.6.8.50:33481,Total threads: 1
Dashboard: http://10.6.8.50:38753/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.50:33655,
Local directory: /tmp/dask-scratch-space/worker-a7hw76to,Local directory: /tmp/dask-scratch-space/worker-a7hw76to

0,1
Comm: tcp://10.6.8.56:44467,Total threads: 1
Dashboard: http://10.6.8.56:43909/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.56:37479,
Local directory: /tmp/dask-scratch-space/worker-jpx1w5ag,Local directory: /tmp/dask-scratch-space/worker-jpx1w5ag

0,1
Comm: tcp://10.6.8.68:33513,Total threads: 1
Dashboard: http://10.6.8.68:45547/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.68:46335,
Local directory: /tmp/dask-scratch-space/worker-uk64nbus,Local directory: /tmp/dask-scratch-space/worker-uk64nbus

0,1
Comm: tcp://10.6.8.65:45765,Total threads: 1
Dashboard: http://10.6.8.65:40061/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.65:33705,
Local directory: /tmp/dask-scratch-space/worker-nos1no0b,Local directory: /tmp/dask-scratch-space/worker-nos1no0b

0,1
Comm: tcp://10.6.8.65:40667,Total threads: 1
Dashboard: http://10.6.8.65:46429/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.65:43415,
Local directory: /tmp/dask-scratch-space/worker-8c7eshn7,Local directory: /tmp/dask-scratch-space/worker-8c7eshn7

0,1
Comm: tcp://10.6.8.77:43423,Total threads: 1
Dashboard: http://10.6.8.77:43059/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.77:35513,
Local directory: /tmp/dask-scratch-space/worker-5mdzwi60,Local directory: /tmp/dask-scratch-space/worker-5mdzwi60

0,1
Comm: tcp://10.6.8.67:41813,Total threads: 1
Dashboard: http://10.6.8.67:42117/status,Memory: 20.00 GiB
Nanny: tcp://10.6.8.67:41115,
Local directory: /tmp/dask-scratch-space/worker-9gfd48fp,Local directory: /tmp/dask-scratch-space/worker-9gfd48fp


In [110]:
results = []
for year in np.arange(start_year, end_year, 1):
    out = dask.delayed(singleYearUrl)(year=year)
    results.append(out)
    
results = dask.compute(*results)

SSLError: None: Max retries exceeded with url: /data/NLDAS/NLDAS_FORA0125_H.2.0/2001/023/NLDAS_FORA0125_H.A20010123.1100.020.nc (Caused by None)