### Purpose:
 - Download retrospective hourly analysis & assimilation NWM files from the Google Cloud archive
 - Run the cells in order to import libraries, select parameters, define functions, and download files
    
### Inputs:
 - (1) begin date for data retrieval
 - (2) end date for data retrieval (inclusive)
 - (3) desired product (channel routing or forcing)
 - (4) directory for downloaded files
 - (5) max. # of download requests per file

In [None]:
# import python libraries

import os
import pandas as pd
import requests
import sys

from time import sleep
from tqdm.auto import trange

In [None]:
# select input parameters

begindate = '20210918' # data is avail. beginning 20180917
enddate = '20210919' # inclusive
product = 'channel_rt' # 'channel_rt' or 'forcing'
destfolder = r'C:\Users\mkw2538\Coding\Notebooks\test' # directory will be created if it does not already exist. use raw string for Windows paths
max_requests = 30 # max. # of download requests

In [None]:
# define functions

def get_netcdf(filetype,begin_date,end_date,output_folder,max_attempts):
    
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    
    if filetype =='channel_rt':
        prodstr = ''
    elif filetype == 'forcing':
        prodstr = 'forcing_'
    else:
        print("Product error. Choose 'channel_rt' or 'forcing'.")
        sys.exit()
        
    
    t1 = pd.to_datetime(begin_date)
    t2 = pd.to_datetime(end_date)
    dates = pd.date_range(t1,t2)
    
    # use trange to print status bar
    for i in trange(len(dates), desc='Total'):
        date = dates[i]
        save_dir = date.strftime('%Y%m%d')
        
        if not os.path.exists(output_folder+'/'+save_dir):
            os.mkdir(output_folder+'/'+save_dir)
            
        for hr in trange(24, desc=save_dir, leave=False):
            
            url = f'https://storage.googleapis.com/national-water-model/' \
                  f'nwm.{date.strftime("%Y%m%d")}/{prodstr}analysis_assim/' \
                  f'nwm.t{str(hr).zfill(2)}z.analysis_assim.{filetype}.tm00.conus.nc'
            
            filename = os.path.basename(url)
            write_file = os.path.join(output_folder, save_dir, filename)

            for attempt in range(max_attempts):
                try:
                    r = requests.get(url)
                    with open(write_file, 'wb') as f:
                        f.write(r.content)
                    break
                except Exception as ex:
                    if attempt != max_attempts - 1:
                        sleep(0.5)  # Give NOAA time to wake up
                    else:
                        m = 'Could not download file.\n' + str(ex)
                        raise Exception(m)

In [None]:
# download files

get_netcdf(product,begindate,enddate,destfolder,max_requests)