# Notebook which converts per region netCDF files to Zarr files to make them more efficient when indexing

### uses pangeo_small environment

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import xarray as xr
import zarr
from dask.distributed import Client
from joblib import Parallel, delayed
import pandas as pd
import os

## Set the parameters here
### Ensure all State and Regions you want to transform are specified here. 

In [8]:
#data_root = '/media/scottcha/E1/Data/OAPMLData/'
data_root = '/media/scottcha/E1/Data/Temp/Tutorial/'
interpolation = 1

#currently only have Washington regions and one season specified for the tutorial
#uncomment regions and seasons if doing a larger transform
regions = {#'Utah': ['Abajos', 'Logan', 'Moab', 'Ogden', 'Provo', 
           #'Salt Lake', 'Skyline', 'Uintas'],  
           #'Colorado': ['Grand Mesa Zone', 'Sangre de Cristo Range', 'Steamboat Zone', 'Front Range Zone',
           #'Vail Summit Zone', 'Sawatch Zone', 'Aspen Zone', 
           #'North San Juan Mountains', 'South San Juan Mountains', 'Gunnison Zone'],
           'Washington': ['Mt Hood', 'Olympics', 'Snoqualmie Pass', 'Stevens Pass',
           'WA Cascades East, Central', 'WA Cascades East, North', 'WA Cascades East, South',
           'WA Cascades West, Central', 'WA Cascades West, Mt Baker', 'WA Cascades West, South'
           ]
           }
seasons = ['15-16']#, '16-17', '17-18', '18-19']

In [9]:
processed_path = data_root + '/3.GFSFiltered'+ str(interpolation) + 'xInterpolation/'
zarr_base_path = data_root + '/4.GFSFiltered'+ str(interpolation) + 'xInterpolationZarr/'

In [10]:
if not os.path.exists(zarr_base_path):
    os.makedirs(zarr_base_path)

In [11]:
def compute_region(region_name, season, state):
    first = True
    base_path = processed_path + season + '/' + '/Region_' + region_name 
    zarr_path = zarr_base_path + season + '/' + state + '/Region_' + region_name + '.zarr'
    p = 181
    if season in ['15-16', '19-20']:
        p = 182 #leap years
    
    snow_start_date = '2015-11-01'
    if season == '16-17':
        snow_start_date = '2016-11-01'
    if season == '17-18':
        snow_start_date = '2017-11-01'
    if season == '18-19':
        snow_start_date = '2018-11-01'
    
    date_values_pd = pd.date_range(snow_start_date, periods=p, freq="D")
    try:
        with xr.open_zarr(zarr_path) as z:
            if z.time.values[-1] == date_values_pd[-1]:
                print(' already exists: ' + region_name + ' ' + season + ' ' + state)
                z.close()
                return
            else:
                #already exists but incomplete
                date_values_pd = [pd.Timestamp(v) for v in date_values_pd.values.astype('datetime64[ns]') if v not in z.time.values]
                print(' some exist but have to complete ' + str(len(date_values_pd)))
                first = False
    except ValueError as err:
        #ignore as it doesn't exist yet
        print('')
    
    for d in date_values_pd:
        
        path =  base_path + '_' + d.strftime('%Y%m%d') + '.nc'
        print('On ' + str(path.split('/')[-1]))
        
        try:
            ds = xr.open_dataset(path, chunks={'latitude':1, 'longitude':1})
        except OSError as err:
            print(' missing file: ' + path)
            continue
            
        ds = ds.to_array(name='vars').chunk({'time':1, 'latitude':1, 'longitude':1, 'variable':-1}).to_dataset()
        
        try:

            if first:
                ds.to_zarr(zarr_path, consolidated=True)
                first=False
            else:
                ds.to_zarr(zarr_path, consolidated=True, append_dim='time')
        except ValueError as err:
            print('Value Error on ' + zarr_path)
            return

def process_tuple(t): 
    compute_region(t[0], t[1], t[2])

In [16]:
def make_list():
    to_process = []
    for s in seasons:
        for state in regions.keys():           
            for r in regions[state]:
                to_process.append((r,s,state))
    return to_process
    
l = make_list()     
l

[('Mt Hood', '15-16', 'Washington'),
 ('Olympics', '15-16', 'Washington'),
 ('Snoqualmie Pass', '15-16', 'Washington'),
 ('Stevens Pass', '15-16', 'Washington'),
 ('WA Cascades East, Central', '15-16', 'Washington'),
 ('WA Cascades East, North', '15-16', 'Washington'),
 ('WA Cascades East, South', '15-16', 'Washington'),
 ('WA Cascades West, Central', '15-16', 'Washington'),
 ('WA Cascades West, Mt Baker', '15-16', 'Washington'),
 ('WA Cascades West, South', '15-16', 'Washington')]

In [17]:
#one state & season takes about 6 hours with 15 cores on my machine
Parallel(n_jobs=15, backend="multiprocessing")(map(delayed(process_tuple), l))






On Region_Snoqualmie Pass_20151101.ncOn Region_Olympics_20151101.nc




On Region_Stevens Pass_20151101.ncOn Region_Mt Hood_20151101.ncOn Region_WA Cascades East, Central_20151101.nc
On Region_WA Cascades East, South_20151101.ncOn Region_WA Cascades West, Mt Baker_20151101.nc

On Region_WA Cascades East, North_20151101.ncOn Region_WA Cascades West, Central_20151101.nc
On Region_WA Cascades West, South_20151101.nc





On Region_Stevens Pass_20151102.nc
On Region_Mt Hood_20151102.nc
On Region_Snoqualmie Pass_20151102.nc
On Region_Stevens Pass_20151103.nc
On Region_Mt Hood_20151103.nc
On Region_Stevens Pass_20151104.nc
On Region_Mt Hood_20151104.nc
On Region_Snoqualmie Pass_20151103.nc
On Region_Olympics_20151102.nc
On Region_Stevens Pass_20151105.nc
On Region_Mt Hood_20151105.nc
On Region_WA Cascades West, Mt Baker_20151102.nc
On Region_WA Cascades East, South_20151102.nc
On Region_Snoqualmie Pass_20151104.nc
On Region_Stevens Pass_20151106.nc
On Region_Mt Hood_20151106.nc
On Regio

On Region_Mt Hood_20151227.nc
On Region_Stevens Pass_20151227.nc
On Region_Snoqualmie Pass_20151207.nc
On Region_Mt Hood_20151228.nc
On Region_Stevens Pass_20151228.nc
On Region_WA Cascades West, Central_20151110.nc
On Region_Mt Hood_20151229.nc
On Region_WA Cascades East, South_20151113.nc
On Region_Snoqualmie Pass_20151208.nc
On Region_Stevens Pass_20151229.nc
On Region_WA Cascades West, Mt Baker_20151113.nc
On Region_Mt Hood_20151230.nc
On Region_Stevens Pass_20151230.nc
On Region_Olympics_20151116.nc
On Region_Snoqualmie Pass_20151209.nc
On Region_Mt Hood_20151231.nc
On Region_Stevens Pass_20151231.nc
On Region_WA Cascades East, Central_20151109.nc
On Region_WA Cascades West, South_20151109.nc
On Region_Stevens Pass_20160101.nc
On Region_Mt Hood_20160101.nc
On Region_Snoqualmie Pass_20151210.nc
On Region_Stevens Pass_20160102.nc
On Region_Mt Hood_20160102.nc
On Region_WA Cascades East, South_20151114.nc
On Region_Snoqualmie Pass_20151211.nc
On Region_WA Cascades West, Mt Baker_2015

On Region_Mt Hood_20160222.nc
On Region_Stevens Pass_20160222.nc
On Region_WA Cascades West, Central_20151119.nc
On Region_Mt Hood_20160223.nc
On Region_Olympics_20151130.nc
On Region_Stevens Pass_20160223.nc
On Region_Snoqualmie Pass_20160113.nc
On Region_WA Cascades East, South_20151125.nc
On Region_Mt Hood_20160224.nc
On Region_WA Cascades West, Mt Baker_20151125.nc
On Region_Stevens Pass_20160224.nc
On Region_Snoqualmie Pass_20160114.nc
On Region_Mt Hood_20160225.nc
On Region_Stevens Pass_20160225.nc
On Region_Mt Hood_20160226.nc
On Region_Snoqualmie Pass_20160115.nc
On Region_Stevens Pass_20160226.nc
On Region_WA Cascades East, North_20151116.nc
On Region_Olympics_20151201.nc
On Region_Mt Hood_20160227.nc
On Region_Stevens Pass_20160227.nc
On Region_Snoqualmie Pass_20160116.nc
On Region_Mt Hood_20160228.nc
On Region_WA Cascades East, Central_20151117.nc
On Region_Stevens Pass_20160228.nc
On Region_WA Cascades East, South_20151126.nc
On Region_WA Cascades West, Mt Baker_20151126.nc

On Region_Mt Hood_20160419.nc
On Region_Stevens Pass_20160418.nc
On Region_WA Cascades East, North_20151123.nc
On Region_WA Cascades West, South_20151124.nc
On Region_WA Cascades East, South_20151207.nc
On Region_WA Cascades West, Mt Baker_20151207.nc
On Region_Mt Hood_20160420.nc
On Region_Stevens Pass_20160419.nc
On Region_Snoqualmie Pass_20160219.nc
On Region_Olympics_20151215.nc
On Region_Mt Hood_20160421.nc
On Region_Stevens Pass_20160420.nc
On Region_Snoqualmie Pass_20160220.nc
On Region_Mt Hood_20160422.nc
On Region_Stevens Pass_20160421.nc
On Region_Mt Hood_20160423.nc
On Region_Stevens Pass_20160422.nc
On Region_Snoqualmie Pass_20160221.nc
On Region_Olympics_20151216.nc
On Region_WA Cascades West, Central_20151129.nc
On Region_Mt Hood_20160424.nc
On Region_Stevens Pass_20160423.nc
On Region_WA Cascades West, Mt Baker_20151208.nc
On Region_WA Cascades East, South_20151208.nc
On Region_Snoqualmie Pass_20160222.nc
On Region_Mt Hood_20160425.nc
On Region_Stevens Pass_20160424.nc
O

On Region_Snoqualmie Pass_20160418.nc
On Region_WA Cascades East, South_20151227.nc
On Region_Olympics_20160108.nc
On Region_Snoqualmie Pass_20160419.nc
On Region_WA Cascades West, Mt Baker_20151227.nc
On Region_WA Cascades East, Central_20151207.nc
On Region_Snoqualmie Pass_20160420.nc
On Region_WA Cascades West, South_20151207.nc
On Region_Snoqualmie Pass_20160421.nc
On Region_Olympics_20160109.nc
On Region_WA Cascades West, Central_20151214.nc
On Region_WA Cascades East, South_20151228.nc
On Region_Snoqualmie Pass_20160422.nc
On Region_WA Cascades West, Mt Baker_20151228.nc
On Region_WA Cascades East, North_20151206.nc
On Region_Snoqualmie Pass_20160423.nc
On Region_Olympics_20160110.nc
On Region_Snoqualmie Pass_20160424.nc
On Region_WA Cascades East, Central_20151208.nc
On Region_WA Cascades East, South_20151229.nc
On Region_WA Cascades West, Mt Baker_20151229.nc
On Region_Snoqualmie Pass_20160425.nc
On Region_WA Cascades West, South_20151208.nc
On Region_WA Cascades West, Central_

On Region_WA Cascades East, South_20160126.nc
On Region_Olympics_20160213.nc
On Region_WA Cascades West, Central_20160105.nc
On Region_WA Cascades East, Central_20151226.nc
On Region_WA Cascades West, South_20151226.nc
On Region_Olympics_20160214.nc
On Region_WA Cascades West, Mt Baker_20160127.nc
On Region_WA Cascades East, South_20160127.nc
On Region_WA Cascades East, North_20151224.nc
On Region_WA Cascades West, Central_20160106.nc
On Region_Olympics_20160215.nc
On Region_WA Cascades East, Central_20151227.nc
On Region_WA Cascades West, Mt Baker_20160128.nc
On Region_WA Cascades East, South_20160128.nc
On Region_WA Cascades West, South_20151227.nc
On Region_Olympics_20160216.nc
On Region_WA Cascades East, North_20151225.nc
On Region_WA Cascades West, Mt Baker_20160129.nc
On Region_WA Cascades West, Central_20160107.nc
On Region_WA Cascades East, South_20160129.nc
On Region_Olympics_20160217.nc
On Region_WA Cascades East, Central_20151228.nc
On Region_WA Cascades West, Mt Baker_20160

On Region_WA Cascades West, Central_20160129.nc
On Region_WA Cascades East, North_20160112.nc
On Region_Olympics_20160323.nc
On Region_WA Cascades West, Mt Baker_20160227.nc
On Region_WA Cascades East, South_20160227.nc
On Region_WA Cascades West, South_20160115.nc
On Region_Olympics_20160324.nc
On Region_WA Cascades West, Central_20160130.nc
On Region_WA Cascades West, Mt Baker_20160228.nc
On Region_WA Cascades East, Central_20160116.nc
On Region_WA Cascades East, South_20160228.nc
On Region_WA Cascades East, North_20160113.nc
On Region_Olympics_20160325.nc
On Region_WA Cascades West, South_20160116.nc
On Region_WA Cascades West, Mt Baker_20160229.nc
On Region_WA Cascades West, Central_20160131.nc
On Region_WA Cascades East, South_20160229.nc
On Region_Olympics_20160326.nc
On Region_WA Cascades East, Central_20160117.nc
On Region_WA Cascades East, North_20160114.nc
On Region_WA Cascades West, Mt Baker_20160301.nc
On Region_WA Cascades East, South_20160301.nc
On Region_Olympics_2016032

On Region_WA Cascades West, Mt Baker_20160329.nc
On Region_WA Cascades West, South_20160204.nc
On Region_WA Cascades East, South_20160329.nc
On Region_WA Cascades East, North_20160201.nc
On Region_WA Cascades West, Mt Baker_20160330.nc
On Region_WA Cascades East, Central_20160205.nc
On Region_WA Cascades West, Central_20160223.nc
On Region_WA Cascades East, South_20160330.nc
On Region_WA Cascades West, South_20160205.nc
On Region_WA Cascades West, Mt Baker_20160331.nc
On Region_WA Cascades East, South_20160331.nc
On Region_WA Cascades East, North_20160202.nc
On Region_WA Cascades West, Central_20160224.nc
On Region_WA Cascades East, Central_20160206.nc
On Region_WA Cascades West, Mt Baker_20160401.nc
On Region_WA Cascades West, South_20160206.nc
On Region_WA Cascades East, South_20160401.nc
On Region_WA Cascades West, Central_20160225.nc
On Region_WA Cascades West, Mt Baker_20160402.nc
On Region_WA Cascades East, North_20160203.nc
On Region_WA Cascades East, Central_20160207.nc
On Regi

On Region_WA Cascades West, Central_20160324.nc
On Region_WA Cascades West, South_20160301.nc
On Region_WA Cascades East, North_20160226.nc
On Region_WA Cascades East, Central_20160302.nc
On Region_WA Cascades West, Central_20160325.nc
On Region_WA Cascades West, South_20160302.nc
On Region_WA Cascades East, North_20160227.nc
On Region_WA Cascades East, Central_20160303.nc
On Region_WA Cascades West, Central_20160326.nc
On Region_WA Cascades West, South_20160303.nc
On Region_WA Cascades West, Central_20160327.nc
On Region_WA Cascades East, North_20160228.nc
On Region_WA Cascades East, Central_20160304.nc
On Region_WA Cascades West, South_20160304.nc
On Region_WA Cascades West, Central_20160328.nc
On Region_WA Cascades East, North_20160229.nc
On Region_WA Cascades East, Central_20160305.nc
On Region_WA Cascades West, South_20160305.nc
On Region_WA Cascades West, Central_20160329.nc
On Region_WA Cascades East, Central_20160306.nc
On Region_WA Cascades East, North_20160301.nc
On Region_WA

On Region_WA Cascades West, South_20160416.nc
On Region_WA Cascades East, North_20160410.nc
On Region_WA Cascades East, Central_20160417.nc
On Region_WA Cascades West, South_20160417.nc
On Region_WA Cascades East, North_20160411.nc
On Region_WA Cascades East, Central_20160418.nc
On Region_WA Cascades West, South_20160418.nc
On Region_WA Cascades East, North_20160412.nc
On Region_WA Cascades East, Central_20160419.nc
On Region_WA Cascades West, South_20160419.nc
On Region_WA Cascades East, North_20160413.nc
On Region_WA Cascades East, Central_20160420.nc
On Region_WA Cascades West, South_20160420.nc
On Region_WA Cascades East, North_20160414.nc
On Region_WA Cascades East, Central_20160421.nc
On Region_WA Cascades West, South_20160421.nc
On Region_WA Cascades East, North_20160415.nc
On Region_WA Cascades East, Central_20160422.nc
On Region_WA Cascades West, South_20160422.nc
On Region_WA Cascades East, North_20160416.nc
On Region_WA Cascades East, Central_20160423.nc
On Region_WA Cascade

[None, None, None, None, None, None, None, None, None, None]