## Analysis to investigate the median snow depth for ASO lidar data between 3m and 50m product
* 3m data: compiled netCDFwith 3m topo.nc 
    * units = mm
    * mask: applied (in compiling step)
* 50m data: ascii files with 50m hetch mask
    * units = m
    * mask: APPLY HERE!

NOTE: masks do not pefectly line up, not ideal, but for size of dataset this should still produce useful initial results

In [1]:
%matplotlib inline 

import numpy as np
import pandas as pd
from datetime import datetime
from scipy.stats import pearsonr 
import matplotlib.pyplot as plt
import glob
import os
import time
import xarray as xr


# import dask.array as da
# from dask.diagnostics import ProgressBar
# from dask.distributed import Client, LocalCluster

import matplotlib.patheffects as PathEffects

### load 3m data (xarray: compiled netCDF)

In [2]:
# fname = '~/Documents/projects/thesis/results/output/compiled_SUPERsnow.nc' #ARS
fname = '~/Documents/research/sierra/data/compiled_SUPERsnow.nc' #BSU

years = range(2013,2017) #2013-2017

#~~~~ ds full
ds = xr.open_dataset(fname,  chunks={'time':1,'x':1000,'y':1000})
ds.close 
ds

#~~~~~~~~~~~~~~ds subset
# dsubset = ds.sel(time=slice('2013','2017'))
# dsubset.close()

# ds = dsubset
# ds.close()
# ds

# dsmall = ds.sel(time='2018')
# dsmall.close()

# ds = dsmall
# ds.close()
# ds


<xarray.Dataset>
Dimensions:  (time: 51, x: 17569, y: 17002)
Coordinates:
  * x        (x) float32 254007.0 254010.0 254013.0 ... 306708.0 306711.0
  * y        (y) float32 4230327.0 4230324.0 4230321.0 ... 4179327.0 4179324.0
  * time     (time) datetime64[ns] 2013-04-03 2013-04-29 ... 2018-05-28
    mask     (y, x) uint8 dask.array<chunksize=(1000, 1000), meta=np.ndarray>
    dem      (y, x) float32 dask.array<chunksize=(1000, 1000), meta=np.ndarray>
    veg      (y, x) float32 dask.array<chunksize=(1000, 1000), meta=np.ndarray>
Data variables:
    snow     (time, y, x) int16 dask.array<chunksize=(1, 1000, 1000), meta=np.ndarray>

In [3]:
# drop 4/1/2016  & 7-27-2019 (tmp because 50m version needs extent fixing....)
#dlist = map(np.datetime64, ['2016-04-01', '2017-07-27'])  #function, [list of things]
ds = ds.drop([np.datetime64('2016-04-01'), np.datetime64('2017-07-27') ], dim='time')
ds

  This is separate from the ipykernel package so we can avoid doing imports until


<xarray.Dataset>
Dimensions:  (time: 49, x: 17569, y: 17002)
Coordinates:
  * x        (x) float32 254007.0 254010.0 254013.0 ... 306708.0 306711.0
  * y        (y) float32 4230327.0 4230324.0 4230321.0 ... 4179327.0 4179324.0
  * time     (time) datetime64[ns] 2013-04-03 2013-04-29 ... 2018-05-28
    mask     (y, x) uint8 dask.array<chunksize=(1000, 1000), meta=np.ndarray>
    dem      (y, x) float32 dask.array<chunksize=(1000, 1000), meta=np.ndarray>
    veg      (y, x) float32 dask.array<chunksize=(1000, 1000), meta=np.ndarray>
Data variables:
    snow     (time, y, x) int16 dask.array<chunksize=(1, 1000, 1000), meta=np.ndarray>

### dataframes 3m
compute median for 3m data and store in pd.DataFrame

In [4]:
# 3m median
medians_3m=ds.snow.median(dim=('x', 'y'))
medians_3m = medians_3m.values/10

#m3 medians gt 0
medians_3m_gt0 = ds.snow.where(ds.snow > 0).median(dim=('x','y'))
medians_3m_gt0 = medians_3m_gt0.values/10

# three_m_df = pd.DataFrame(zip(medians_3m, medians_3m_gt0), columns = ['3m (cm)','3m>0 (cm)'], index = ds.time.dt.strftime('%Y-%m-%d'))
# three_m_df

NotImplementedError: median is not yet implemented on dask arrays

## load 50m data (ascii files: downloaded from AH's published dataset on zenodo and ARS snow server)

In [None]:
depth_50m_years = list(range(2013,2018))

# #~~~ARS
# mask and DEM
# mask_50m_path = '/home/meganmason/Documents/projects/thesis/data/processing_lidar/depths_50m/tuolx_hetchy_mask_50m.asc'
# dem_50m_path = '/home/meganmason/Documents/projects/thesis/data/processing_lidar/depths_50m/tuolx_dem_50m.asc'

# # ASO snow depth surfaces
# depth_50m_path = '/home/meganmason/Documents/projects/thesis/data/processing_lidar/depths_50m/ASO_50m_depth_surfaces/asc/*.asc'

#~~~BSU
base_path = '/Users/meganmason491/Documents/research/sierra/data/50m_analysis/'

# mask and DEM
mask_50m_path = base_path + 'tuolx_hetchy_mask_50m.asc'
dem_50m_path = base_path + 'tuolx_dem_50m.asc'

# ASO snow depth surfaces
depth_50m_path = base_path + 'asc/*.asc'

In [None]:
# mask and DEM
mask_50m = np.loadtxt(mask_50m_path, skiprows=6)
dem_50m = np.loadtxt(dem_50m_path, skiprows=6)

#ASo snow depth surfaces 
flist = glob.glob(depth_50m_path)
flist = sorted(flist)
print('Number of files as input:', len(flist))

flight_dates = []

for f in sorted(flist):
    dt_str = f.split("/")[-1] #splits on '/' and saves the last one
    dt_str = "".join([c for c in dt_str if c.isnumeric()]) #grabs numeric values for date info
    flight_dates.append(dt_str) #append to flight_date list

# flight_dates # flight dates as strings

flight_dates_dt = [datetime.strptime(flight_date, '%Y%m%d') for flight_date in flight_dates]


### dataframes 50m 
compute medians for 50m data and store in pd.DataFrame

In [None]:
def median_snow_depth(depth_array, convert_factor = 1e-3, mask_array = None, mask_value = None):
    '''
    Gets median snow depth for ASO snow depth surfaces
        args:
            depth_array: numpy array from asc files
            dx: cellsize [m] in x direction
            dy: cellsize [m] in y direction
            convert_factor: (optional, default is 0.001 from mm to m)
            mask_array: (Optional) mask of the area for depth calculation
                       If no mask is provided will calculate for entire array
            mask_value: (Optional) value of the mask to calculate over
                       Default value will be any positive integer
           Note: depth_array and mask_array should cover the same extent 
                 and have the same dx,dy
       return:
           median_depth: median depth [m] over the area in mask
                       or the full area in the array if no mask is provided
    '''
    
    if mask_array is None:
        pass

        
    else:
        median_depth = np.nanmedian(depth_array[[(mask_array == mask_value) & (depth_array >= 0)]] * convert_factor)
        median_depth_gt0 = np.nanmedian(depth_array[[(mask_array == mask_value) & (depth_array > 0)]] * convert_factor)
#         sca = (depth_array[[(mask_array == mask_value) & (depth_array > 0)]].size / mask_array[mask_array == mask_value].size) * 100
        
    return median_depth, median_depth_gt0

In [None]:
medians_50m = []
medians_50m_gt0 = []

for f in sorted(flist):
    
    depth_array = np.loadtxt(f, skiprows=6) #load asc files
    median_depth, median_depth_gt0 = median_snow_depth(depth_array, convert_factor=100, mask_array=mask_50m, mask_value=np.max(mask_50m)) #call median_snow_depth function
    
    medians_50m.append(median_depth)
    medians_50m_gt0.append(median_depth_gt0)

    
# fifty_m_df = pd.DataFrame(zip(medians_50m, medians_50m_gt0), columns = ['50m (cm)','50m>0 (cm)'], index = ds.time.dt.strftime('%Y-%m-%d'))
# fifty_m_df
# NOTE, index equals xarray 3m

zip up dataframes

In [None]:
# df = pd.DataFrame(zip(medians_3m_gt0.values/10, medians_50m_gt0, medians_3m.values/10, medians_50m, flight_dates_dt), 
#                   columns = ['3m>0 (cm)', '50m>0 (cm)', '3m (cm)','50m (cm)', 'flight_dates (50m)'], 
#                   index = ds.time.dt.strftime('%Y-%m-%d'))
# df

df = pd.DataFrame(zip(medians_3m_gt0, medians_50m_gt0, medians_3m, medians_50m), 
                  columns = ['3m>0 (cm)', '50m>0 (cm)', '3m (cm)','50m (cm)']) #, 
#                   index = flight_dates_dt)
# df


In [None]:
df

# plt.plot_date(df['flight_dates (50m)'], df.columns[:-1], fmt='x', tz=None, xdate=True, ydate=False)

In [None]:
df.plot()