# Peru Goldmining - Madre De Dios Region

Example on how to use the Sentinel-1 to look at changes over time in a cloudy region.   Here we use datacubes of Sentinel-1 data, processed by Earth Big Data onto the same tiling system used by Sentinel 2, and stored in Zarr format with 20 time steps in each chunk.

Authors: Josef Kellndorfer (Earth Big Data) and Rich Signell (USGS)

#### Import modules

In [None]:
import os
import xarray as xr
import numpy as np
import pandas as pd
import fsspec
import hvplot.xarray
import panel as pn
import holoviews as hv
import satsearch

In [None]:
# import logging
# logging.getLogger("param.main").setLevel(logging.CRITICAL)
import warnings
warnings.filterwarnings("ignore")

In [None]:
# User defined functions
def time_label_from_idx(ds,idx):
    label=''
    for i in idx:
        t = ds.isel({'time':i})
        l =f'{t.time.values}'.split('T')[0]
        label += f'{l} '
    return label

#### Create or connect to a Dask Gateway Cluster

In [None]:
from dask_gateway import Gateway
from dask.distributed import Client

gateway = Gateway()
gateway.list_clusters()
if gateway.list_clusters():
    print('Existing Dask clusters:')
    for c in gateway.list_clusters():
        print('Cluster Name:',c.name,c.status)
else:
    print('No Cluster running.')

In [None]:
# New or connect:
# If no cluster is running, create a new one, else connect to the first one found (idx=0, change if other cluster should be running)
idx=0
if not gateway.list_clusters():
    cluster = gateway.new_cluster(environment='pangeo', profile='Small Worker')
else:
    cluster=gateway.connect(gateway.list_clusters()[idx].name)  

In [None]:
cluster.scale(10)

In [None]:
client = Client(cluster)     # Creating the cluster is not enough.  Dask always needs a client to attach to the cluster!

In [None]:
cluster

#### Search STAC using satsearch

In [None]:
es_url = 'https://earth-search.aws.element84.com/v0'

Search for all data 

In [None]:
dates = '2017-01-01/2020-12-02'
results = satsearch.Search.search(url = es_url, collections=['sentinel-s2-l2a-cogs'],       
                datetime=dates,  
                query = {'sentinel:utm_zone': {'eq':19}, 'sentinel:latitude_band': {'eq':'L'}, 
                'sentinel:grid_square': {'eq':'CF'}})
print('%s items' % results.found())

Search for all data with less than 5% clouds (only clear images)

In [None]:
dates = '2017-01-01/2020-12-02'
results = satsearch.Search.search(url = es_url, collections=['sentinel-s2-l2a-cogs'],       
                datetime=dates, query={'eo:cloud_cover': {'lt':5}, 
                'sentinel:utm_zone': {'eq':19}, 'sentinel:latitude_band': {'eq':'L'}, 
                'sentinel:grid_square': {'eq':'CF'}})
print('%s items' % results.found())

Since it's mostly cloudy in this region, let's use SAR data for time series

#### Data Cube from Sentinel 1
Cloud-friendly Zarr format on AWS S3, loaded into xarray

In [None]:
fs = fsspec.filesystem('s3', requester_pays=True)

zarrPrefix='s3://esip-qhub/ebd/sentinel-1-l22/zarr/mgrs/'
tile='19LCF'
nameres='20m'

In [None]:
zfiles=fs.ls(zarrPrefix+tile+'/'+nameres)
print('Zarr File')
for zfile in zfiles:
    print(f's3://{zfile}')

#### Open the files and Scale the SAR data to calibrated $\gamma^{0}$ backscatter

To convert between dB, amplitude, and power units in SAR data:

$\gamma^0[dB] = 10*log_{10}(\gamma^0[power])$

$\gamma^0[dB] = 10*log_{10}(\gamma^0[amplitude]^2)$


We store the data as linearly scaled amplitude (DN) in 16bit with a calibration factor of -83 dB according to

$\gamma^0[dB]=10*log_{10}(DN^2)-83$

Thus, in order to convert the data to power units (required for analysis when averaging or other mathmatical operations are involved) we need to apply the following formula:

$\gamma^0[power] = DN^2*CAL$ with $CAL=10^{-8.3}$


In [None]:
CAL=np.power(10,-8.3)
dslist=[]
for z in zfiles:
    fsz=fs.get_mapper(z)
    ds=xr.open_zarr(fsz,consolidated=True)
    # Convert scaled Amplitudes to linear power backscatter (and retaining the attributes)
    attrs=ds.attrs.copy()
    ds=(np.power(ds.astype(np.float32,keep_attrs=True),2)*CAL)
    ds = ds.where(ds>0) # Set data <= 0 to NaN
    ds.attrs=attrs
    dslist.append(ds)

In [None]:
idx = 3
ds=dslist[idx]

In [None]:
print(ds)

How many Gigabytes in size is this dataset?

In [None]:
ds.nbytes/1e9

#### Display with time displayed in a Selection Widget

In [None]:
clim=(2000,8000)
clim=(0.001,0.3)

In [None]:
crs = ds.crs.split('=')[-1]
print(crs)

In [None]:
ds.hvplot.image(x='x', y='y', cmap='gray', rasterize=True, clim=clim,
                xlabel='Easting [m]',ylabel='Northing [m]',
                fields={'time': {'default': ds.time.values[-1]}},
                frame_width=400, groupby='time', widgets={'time':pn.widgets.Select}, 
                crs=crs, tiles='ESRI',
                xformatter='%.0f', yformatter='%.0f', data_aspect=1, legend=True, padding=0.1)

#### Display a subset with time in an Animation (Scrubber) Widget

In [None]:
ds_mines = ds.isel(x=slice(1000,5490), y=slice(0,2400))

In [None]:
ds_mines.hvplot.image(x='x',y='y',cmap='gray',
            rasterize=True,clim=clim,xlabel='Easting [m]',
            ylabel='Northing [m]', frame_width=600,groupby='time',
            widget_type='scrubber',widget_location='bottom',
            xformatter='%.0f',yformatter='%.0f',data_aspect=1, padding=0.1)

#### Pick 1st image, midpoint image, and last image

In [None]:
mid=int(ds.dims['time']/2)
idx=[0,mid,-1]
time1 = ds.isel({'time':idx[0]})
time2 = ds.isel({'time':idx[1]})
time3 = ds.isel({'time':idx[2]})

In [None]:
label_first = f'{time1.time.values}'.split('T')[0]
label_last = f'{time3.time.values}'.split('T')[0]

In [None]:
first = time1.load().hvplot.image(x='x',y='y',cmap='gray',rasterize=True,clim=clim,xlabel='Easting [m]',ylabel='Northing [m]',
                                  label=label_first,frame_width=300,data_aspect=1)
last = time3.load().hvplot.image(x='x',y='y',cmap='gray',rasterize=True,clim=clim,xlabel='Easting [m]',ylabel='Northing [m]',
                                 label=label_last,frame_width=300,data_aspect=1)

#### plot first and last side by side

In [None]:
(first + last)

#### plot change as RGB, where yellow is change during 1st period, red is change during 2nd period

In [None]:
RGB=ds.isel({'time':idx})

In [None]:
print(RGB)

In [None]:
rgb=RGB
rgb=rgb.where(rgb>0,0)
rgb=rgb.where(rgb<1,1)

In [None]:
rgb.load().hvplot.rgb(rasterize=True,x='x',y='y',xlabel='Easting [m]',ylabel='Northing [m]',
            label='RGB: '+time_label_from_idx(ds,idx),frame_width=500,
            xformatter='%.0f',yformatter='%.0f',data_aspect=1)

## Extract interactive time series by clicking on map:
https://discourse.holoviz.org/t/simple-panel-example-of-map-time-series-interaction-for-data-cube/1485/2


In [None]:
clim = (0.01, 0.3)

In [None]:
image=ds_mines.hvplot.image(x='x', y='y', cmap='gray', rasterize=True, clim=clim,
                    xlabel='Easting [m]', ylabel='Northing [m]', frame_width=600, 
                    groupby='time', fields={'time': {'default': ds.time.values[-1]}},
                    xformatter='%.0f', yformatter='%.0f', data_aspect=1, legend=True)

In [None]:
def timeseries(x,y):
    hvlist=[]
    for ds in dslist:
        dB=10*np.log10(ds.sel(x=x, y=y, method='nearest'))
        name=' '.join(list(ds.data_vars.variables.keys())[0].split('_')[2:5])
        hvlist.append(dB.hvplot('time', ylim=[-25,0], label=name, legend='left', 
                                ylabel='gamma_naught [dB]', xlabel='Date', grid=True))
    return hv.Overlay(hvlist).opts(show_legend=True, legend_position='right')

In [None]:
stream = hv.streams.Tap(source=image, x=0, y=0)

Note: Once we create the stream, it seems like any changes require restarting the kernel.  We had hoped that clearing the stream would help, but it does not seem to. 

In [None]:
viz = pn.Column(image,pn.bind(timeseries, x=stream.param.x, y=stream.param.y))

In [None]:
viz.servable('Peru Gold Mining Dashboard')

Shut things down to be friendly:

In [None]:
client.close()

In [None]:
cluster.shutdown() 

In [None]:
cluster.status