This notebook is to download the NWM shortrange data from the google archive. Originally compiled by: Mark Wang, passaH2O group

In [None]:
# import python libraries

import matplotlib.pyplot as plt
import os
import pandas as pd
import requests
import sys
import xarray as xr
import re
import numpy as np

from matplotlib.dates import DateFormatter
from time import sleep

# select input parameters

begindate = '20201103' # data is avail. beginning 20180917
enddate = '20201104'
# enddate = '20190930' # inclusive
product = 'channel_rt' # 'channel_rt' or 'forcing'
destfolder_name = 'november2020' # directory will be created in current working directory if it does not already exist


In [2]:
# Define functions


def get_netcdf(filetype,begin_date,end_date,output_folder_name):
    
    output_folder = os.path.join(os.getcwd(),output_folder_name)
    
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    
    if filetype =='channel_rt':
        prodstr = ''
    elif filetype == 'forcing':
        prodstr = 'forcing_'
    else:
        print("Product error. Choose 'channel_rt' or 'forcing'.")
        sys.exit()
    
        
    
    t1 = pd.to_datetime(begin_date)
    t2 = pd.to_datetime(end_date)
    dates = pd.date_range(t1,t2)
    
    for i in range(len(dates)):
        date = dates[i]
        save_dir = date.strftime('%Y%m%d')
        
        if not os.path.exists(output_folder+'/'+save_dir):
            os.mkdir(output_folder+'/'+save_dir)
            
        for hr in range(24):
            url = f'https://storage.googleapis.com/national-water-model/' \
                  f'nwm.{date.strftime("%Y%m%d")}/{prodstr}short_range/' \
                  f'nwm.t{str(hr).zfill(2)}z.short_range.{filetype}.f001.conus.nc'
            filename = os.path.basename(url)
            write_file = os.path.join(output_folder, save_dir, filename)

            for attempt in range(30):
                try:
                    r = requests.get(url)
                    with open(write_file, 'wb') as f:
                        f.write(r.content)
                    break
                except Exception as ex:
                    if attempt != max_attempts - 1:
                        sleep(0.5)  # Give NOAA time to wake up
                    else:
                        m = 'Could not download file.\n' + str(ex)
                        raise Exception(m)
                        
        print(f'{save_dir} done')
       
        
def get_series(comid, begin_date, end_date, datafolder_name, filetype):
    
    data_folder = os.path.join(os.getcwd(), datafolder_name)
    
    t1 = pd.to_datetime(begin_date)
    t2 = pd.to_datetime(end_date)
    dates = pd.date_range(t1,t2)
    
    df = pd.DataFrame()
    
    for i in range(len(dates)):
        date = dates[i]
        date_dir = date.strftime('%Y%m%d')
            
        for hr in range(24):
            filename = f'nwm.t{str(hr).zfill(2)}z.short_range.{filetype}.f001.conus.nc'
            nc_file = os.path.join(data_folder, date_dir, filename)
            try:
                data = xr.open_dataset(nc_file, engine="netcdf4")
                Q = float(data.sel(feature_id=comid).streamflow.values)
            except OSError as e:
                if re.match('.*NetCDF: Unknown file format:.*', str(e)):
                    Q=np.nan
                else:
                        raise e
            timestamp = pd.to_datetime(f'{date_dir} {hr}:00')
            df.loc[timestamp,'Q'] = Q
    
    return df
    


In [None]:
# download files
get_netcdf(product,begindate,enddate,destfolder_name)