# Script to help user to define a configuration cell for ODC notebooks

*****

Geographical extent, time period, measurements, Coordinate Reference System (CRS) and resolution differ between the datasets available on the SDC.

This script will help you to create a configuration cell to load the data that you want, to be manually copy/pasted or loaded in ODC Jupyter notebook. You also find the overview of all datasets on the website: https://explorer.swissdatacube.org/products, and you can view the "measurements.csv" file within this folder for information on the available datasets.


In [None]:
# Import modules

# reload module before executing code
%load_ext autoreload
%autoreload 2

import pyproj
import numpy as np
import ipywidgets as widgets
import dask.distributed
from pystac_client import Client
import ast
from shapely.geometry import Polygon
from odc.stac import stac_load
import matplotlib.pyplot as plt
from odc import loader
import odc
from pyproj import Proj, transform
import pandas as pd
from sdc_utilities import *


# silence warning (not recommended during development)
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Connect to server
client = dask.distributed.Client()

# This connects to the server and downloads a "catalog" tha contains some metadata
catalog = Client.open("https://explorer.swissdatacube.org/stac")

# Search the catalog for all available products
collections = catalog.get_collections()

# List the collections (products) available in the catalog
product_names = [collection.id for collection in collections]

descriptions = []
# Get the descriptions and print a full list
for pi in product_names:
    collection = catalog.get_collection(pi)
    descriptions.append(collection.description)
    # print(f"{collection.id} - {collection.description}")

df = pd.DataFrame({'products': product_names, 'description': descriptions})
print(df)


In [None]:
# Allow user to select an available product prioritizing ingested (if any)over indexed

# Select the product
product_sel = widgets.RadioButtons(options=product_names, disabled=False)
display(widgets.Label('Select a product and run the next cell: '), product_sel)

In [None]:
  # somewhere the function str is getting assigned as new variable. not sure where. just reset it.
if not isinstance(str, type) or str.__name__ != 'str':
    # Restore the built-in str function
    del str

ds_selected = product_sel.value
df = pd.read_csv("measurements_SwissDC.csv")
dfs = df.loc[df['product']==ds_selected]
resolution = (-1*dfs['resolution'].min(),dfs['resolution'].min())

no_dates_str = 'No date information available. Visit https://explorer.swissdatacube.org/products for more information. You can leave the date also out for now.'
try:
    date_start = pd.to_datetime(dfs['time_start']).min()
except: 
    print(no_dates_str)
try:
    date_end = pd.to_datetime(dfs['time_end']).max()
except: 
    print(no_dates_str)

if date_start and date_end:
    time = (date_start.strftime('%Y-%m-%d'), date_end.strftime('%Y-%m-%d'))
# measurements_aliases = ['QA_PIXEL', 'blue', 'green', 'red', 'nir', 'swir_1', 'swir_2']
out = {}
aliases_ = {}
for r in range(len(dfs.index)):
    # r['measurement']
    _alias = dfs.iloc[r,:]['aliases']
    _alias = ast.literal_eval(_alias)
    _meas = dfs.iloc[r,:]['measurement']
    out[r] = _meas + ': ' + str(_alias)
    aliases_[r] = _alias[-1]

df = pd.DataFrame(list(out.values()), columns=['Bands'])

measur_sel = widgets.SelectMultiple(options=sorted(list(df['Bands'])),disabled=False)
display(widgets.Label('Select measurements (displayed with their aliases) and run the next cell: '), measur_sel)


In [None]:
# Convert selection to measurements list and message 

measur_list = list(measur_sel.value)

measur_sel = []
alias_sel = []
msg = '# to make your live easier you can manually replace the measurements variable by \n' \
      '# one of their alias:\n'
# cnt = 0
for m in measur_list:
    str_1 = m.split(': ')[0]
    str_2 = ast.literal_eval(m.split(': ')[1])[-1]
    measur_sel.append(str_1)
    if str_1 == "QA_PIXEL" or str_1 == "SCL":
        alias_sel.append(str_1)
    else:
        alias_sel.append(str_2)
    # cnt +=1

print(f"Using the following measurements {measur_sel}")
print(f"Corresponding to the aliases     {alias_sel}")  # humanized band names

## Defining spatial extent

Open the website https://geojson.io/ and make your selection using the "rectangle" selection tool. The coordinates pop up on the right-hand side. Select the min and max values for the latitude and longitude. In the example this would be:

```
longitude =  (7.127, 7.199) 
latitude =  (46.773, 46.816)
```

Choose a <span style="color:red; font-size: 20px">***very small area*** </span>to test your workflow first, like in the example below.

Once you identified your bounding box (coordinates), add this into the cell below.

![geojson.io example](https://www.dropbox.com/scl/fi/9b93k2tsmk9u3nw2e3n0l/geojson.jpeg?rlkey=k5o3axx60eddd5nv58dx50bij&dl=1)

In [None]:
product = ds_selected
measurements = measur_sel

# Only change the extent in the following two lines :
longitude =  (7.127, 7.199) 
latitude =  (46.773, 46.816)
#  --------------------------------------------------

crs = 'epsg:4326'  # This is lat/lon geographic coordinates

# It makes sense to use a projected coordinates system for analysis related to areas and at high latitudes (Switzerland already above 45 degrees North)
output_crs = 'epsg:2056'   # (https://epsg.io/2056)

# You can define the final resolution. The original resolution is already stored in the variable "resolution". You can check it by making a new cell above and calling that variable
# resolution = -100.0, 100.0

In [None]:
# Select time period

start_date = widgets.DatePicker(description='Start date',
                                value = date_start.date(),
                                disabled=False)
end_date = widgets.DatePicker(description='End date',
                              value = date_end.date(),
                              disabled=False)
display(widgets.Label('IF REQUIRED define time period (cannot be outside of the initial displayed time) and run the next cell:'),
        widgets.HBox([start_date, end_date]))

In [None]:
# Check defined time period

assert start_date.value >= date_start.date(), \
       'Start date cannot be defined before {}'.format(date_start.date())
assert end_date.value <= date_end.date(), \
       'End date cannot be defined after {}'.format(date_end.date())
assert start_date.value <= end_date.value, \
       'End date is defined before start date'

# end_date = end_date.value + timedelta(days=1) # end_date is not inclusive !

print('Time period is OK')

In [None]:
##### Resume configuration parameters in a format ready to be copy/pasted to a new cell,
# and in a txt file to be loaded with the '%load config_cell.txt' magic.

str = f'''# Configuration

product = '{product_sel.value}'
measurements = {measur_sel}
aliases = {alias_sel}  # you can also provide only the aliases and get the measurements with:
# measurements, aliases = get_alias_band(aliases)
{msg}
longitude = ({longitude[0]}, {longitude[1]})
latitude = ({latitude[0]}, {latitude[1]})
crs = 'epsg:4326'

time = ('{start_date.value.strftime('%Y-%m-%d')}', '{end_date.value.strftime('%Y-%m-%d')}')
# the following date formats are also valid:
# time = ('2000-01-01', '2001-12-31')
# time=('2000-01', '2001-12')
# time=('2000', '2001')

# You can use an UTM zone according to the DataCube System.
# We prefer not to use this, instead specifying SwissGrid (epsg:2056).
# output_crs = 'epsg:2056'

output_crs = '{output_crs}'
resolution = {resolution[0]}, {resolution[1]}

# These are the pixel classifications for Sentinel (SCL) and Landsat (QA_PIXEL); 
# you can use values to mask out values that belong to certain classes

###################################
# SCL categories:                 #
#   0 - no data                   #
#   1 - saturated or defective    #
#   2 - dark area pixels          #
#   3 - cloud_shadows             #
#   4 * vegetation                #
#   5 * not vegetated             #
#   6 * water                     #
#   7 * unclassified              #
#   8 - cloud medium probability  #
#   9 - cloud high probability    #
#  10 - thin cirrus               #
#  11 * snow                      #
###################################

# Check for more detailed information: 
# - Landsat 8/9 (OLI/TIRS), Page 19:
# https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/LSDS-1619_Landsat8-9-Collection2-Level2-Science-Product-Guide-v6.pdf
# - Landsat 7 (ETM+), Page 15:
# https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/LSDS-1337_Landsat7ETM-C2-L2-DFCB-v6.pdf
# - Landsat 4,5 (TM), Page 18:
# https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/LSDS-1415_Landsat4-5-TM-C2-L1-DFCB-v3.pdf

#############################################
# QA_PIXEL BITS : CATEGORIES                #
#    0 : Fill                               #
#    1 : Clear                              #
#    2 : Water                              #
#    3 : Cloud shadow                       #
#    4 : Snow                               #
#    5 : Cloud                              #
#   10 : Terrain occlusion (Landsat 8 only) #
#############################################

chunks = {{"x": 2048, "y": 2048, "time": 1}}  # 2048 values are OK with ~21Gb memory available
'''
print(str)
with open('config_cell.txt', 'w') as text_file:
    print(str, file=text_file)