### Subsetter Panel App

Goal: interactively choose a small bounding box to download subset for local QGIS work

In [None]:
import xarray as xr
import rioxarray 
import os
import dask
import pandas as pd

import cmr

# visualization
import holoviews as hv
import hvplot.xarray
import panel as pn
import param

In [None]:
# Initialization steps not requiring auth
# NOTE: streaming with GDAL from NSIDC SERVER REQUIRES you have a ~/.netrc file 
# behind the scenes we're using GDAL to make requests, and we set some Env vars for performance
#GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR GDAL_HTTP_COOKIEFILE=.urs_cookies GDAL_HTTP_COOKIEJAR=.urs_cookies
env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
           GDAL_HTTP_COOKIEFILE='.urs_cookies',
           GDAL_HTTP_COOKIEJAR='.urs_cookies',
           GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
           GDAL_SWATH_SIZE='200000000',
           VSI_CURL_CACHE_SIZE='200000000'
          )
os.environ.update(env)

In [None]:
def get_cmr_urls():
    short_name = 'NSIDC-0723'
    version = '3'
    time_start = '2010-01-01T00:00:00Z'
    time_end = '2022-10-05T15:43:33Z' #some far off time in the future
    #time_start = None
    #time_end = None
    #bounding_box = '-54.85,69.31,-52.18,70.26'
    bounding_box = None
    polygon = None
    filename_filter = '*gamma0*'
    #filename_filter = None

    urls = cmr.get_urls(short_name, version, time_start, time_end, bounding_box, polygon, filename_filter)
    cogs = [url for url in urls if url.endswith('tif')]
    return cogs
    
assets = get_cmr_urls()

In [None]:
class Stage1(param.Parameterized):
    
    # widget-linked variables
    username = param.String()
    password = param.String()
    action = param.Action(lambda x: x.param.trigger('action'), label='Enter Credentials')

    @param.depends('action', watch=True)
    def _write_netrc(self):
        #self.ready = True
        
        #print(self.username)
        # NOTE: event is from linked button
        # write.netrc file if it doesn't exist (on mybinder.org)
        netrcPath = os.path.join(os.path.expanduser('~'), '.netrc')
        if not os.path.exists(netrcPath):
            with open(netrcPath, 'w') as f:
                f.write(f'machine urs.earthdata.nasa.gov login {self.username} password {self.password}\n')
        os.chmod(netrcPath, 0o600)    

    
    def view(self):
         # view depending on widget values, doesn't really matter here...
        text = pn.pane.Markdown('''
        ## MEaSUREs Greenland Image Mosaics from Sentinel-1A and -1B, Version 3
        *from Copernicus Sentinel-1A and -1B imaging satellites starting in January 2015*

        <a href="https://nsidc.org/data/nsidc-0723" target="_blank">Dataset technical reference (nsidc-0723)</a>

        <a href="http://epsg.io/3413" target="_blank">Map projection reference (EPSG:3413)</a>

        #### Instructions: 
        
        1. Enter your [NASA EarthData Login](https://urs.earthdata.nasa.gov)
        2. Click the 'Enter Credentials' button
        3. Click the 'Next' button in the upper right to load the Subsetter App
        ''', width=800)   
        return text
    
    def panel(self):
        widgets = pn.panel(self.param,  widgets={'password': pn.widgets.PasswordInput})
        return pn.Row(self.view, widgets)

In [None]:
#stage1 = Stage1(name='NASA Earthdata credentials:')
#stage1.panel()

In [None]:
class Stage2(param.Parameterized):
    counter = param.Number(default=0, precedence=-1) #invisible counter
    box = hv.streams.BoundsXY(bounds=(-243538, -2295690, -149311, -2254858))     
    progress = pn.widgets.Progress(name='Progress', active=False, width=150, bar_color='info')
    action = param.Action(lambda x: x.param.trigger('action'), label='Get data!')
    
    @dask.delayed
    def lazy_open(self, href, masked=True):
        #print(href)
        filename = href.split('/')[-1] 
        date = href.split('/')[-2] 
        da = rioxarray.open_rasterio(href, chunks=dict(band=1, y="auto", x=-1), masked=masked).rename(band='time')
        da['time'] = [pd.to_datetime(date)]
        da['filename'] = filename
        
        return da
    
    @param.depends('action')
    def toggle_progress(self):
        #val = self.progress.active
        #self.progress.active = not val
        if self.counter != 0:
            self.progress.active = True
            self.plot_video() #want to not run initially
        self.counter +=1
    
    def load_dataarray(self):
        # NOTE: can have server-size issues w/ NSIDC if going above 15 threads
        # if psutil.cpu_count() > 15: num_threads = 12
        with dask.config.set({'scheduler':'threads', 'num_workers':12}):
            dataArrays = dask.compute(*[self.lazy_open(href, masked=False) for href in assets])
        self.DA = xr.concat(dataArrays, dim='time', join='override', combine_attrs='drop')
    
    def plot_map(self):
        #note full mosaic extent = (-625975, -3355975, 849975, -695025) #(minx, miny, maxx, maxy)
        
        da = rioxarray.open_rasterio(assets[-1], chunks=dict(band=1, y="auto", x=-1), 
                                     overview_level=2, masked=False).squeeze('band') 
        da = da.rename(dict(x='easting', y='northing'))
        img = da.hvplot.image(rasterize=True, cmap='gray', 
                              aspect='equal', frame_width=400,
                              title=os.path.basename(assets[-1]))
        self.box.source = img
        bounds = hv.DynamicMap(lambda bounds: hv.Bounds(bounds), streams=[self.box]).opts(color='red')
        mapview = pn.Column(img * bounds) 

        return mapview
    

    def downloadVRT(self):
        from io import StringIO
        with open('paths.txt', 'w') as f:
            f.writelines(['/vsicurl/'+href+'\n' for href in assets])

        xmin, ymin, xmax,ymax = self.box.bounds
        cmd = f'gdalbuildvrt -overwrite -te {int(xmin)} {int(ymin)} {int(xmax)} {int(ymax)} -separate -input_file_list paths.txt nsidc0723-subset.vrt'
        #print(cmd)
        os.system(cmd)

        with open('nsidc0723-subset.vrt') as f:
            data = f.read()
        sio = StringIO(data)
        sio.seek(0)
        return sio
    
        
    #@pn.depends(box.param.bounds)
    def plot_video(self):
        data = self.box.bounds
        #print(data)
        # only generate video after bbox selection change
        if data != (-243538, -2295690, -149311, -2254858):
        #if self.counter != 0:
            if not hasattr(self, 'DA'):
                self.load_dataarray()
    
            keys = ['minx','miny','maxx','maxy']
            bbox_dict = dict(zip(keys,data))
            subset = self.DA.rio.clip_box(**bbox_dict)
            video = subset.hvplot.image(x='x',y='y', 
                                    rasterize=True,
                                    cmap='gray', clim=(-25,5),
                                    aspect='equal', frame_width=800,
                                    widget_type='scrubber', widget_location='bottom') 

            widget = video[1][1][0] 
            widget.interval = 2000   #2 sec between frames 500 ms default
            
            self.progress.active = False
            download = pn.widgets.FileDownload(callback=self.downloadVRT, filename='nsidc0723-subset.vrt', width=300, align='center')
            
            return pn.Column(video, download)
    
    def view(self):
        text = pn.pane.Markdown('''
        ## MEaSUREs Greenland Image Mosaics from Sentinel-1A and -1B, Version 3
        *from Copernicus Sentinel-1A and -1B imaging satellites starting in January 2015*

        <a href="https://nsidc.org/data/nsidc-0723" target="_blank">Dataset technical reference (nsidc-0723)</a>

        <a href="http://epsg.io/3413" target="_blank">Map projection reference (EPSG:3413)</a>

        #### Instructions:
            
        1. Zoom into an area of interest, then use the box-select tool to isolate a small area of interest  
        1. *Be patient*, intially loading the data for your selected region can take a minute...
        1. The zoomed-in view on the right has a video scrubber at the bottom to go through each date
        ''', width=800)   
        widgets = pn.panel(self.param, show_labels=False, show_name=False, margin=0)
        button = pn.Row(widgets, self.toggle_progress, pn.Row(self.progress, 'Retrieving data...'))
        
        return pn.Column(text, button, pn.Row(self.plot_map, self.plot_video))
    
    # no parameteres in this case...
    def panel(self):    
        return self.view()

In [None]:
#stage2 = Stage2()
#stage2.panel()

In [None]:
# # NOTE: for some reason, extent is off when putting image through pipeline...
# add it to the pipeline
pipeline = pn.pipeline.Pipeline()
pipeline.add_stage('Authenticate', Stage1(name='NASA EarthData Credentials'))
pipeline.add_stage('Visualize', Stage2)
pipeline.layout.servable()