In [1]:
# Basic plots
%matplotlib inline
import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = [12, 8]

# Common imports and settings
import os, sys
os.environ['USE_PYGEOS'] = '0'
from IPython.display import Markdown
import pandas as pd
pd.set_option("display.max_rows", None)
import xarray as xr

# Datacube
import datacube
from datacube.utils.rio import configure_s3_access
from datacube.utils import masking
from datacube.utils.cog import write_cog
# https://github.com/GeoscienceAustralia/dea-notebooks/tree/develop/Tools
from dea_tools.plotting import display_map, rgb
from dea_tools.datahandling import mostcommon_crs

# EASI defaults
easinotebooksrepo = '/home/jovyan/easi-notebooks'
if easinotebooksrepo not in sys.path: sys.path.append(easinotebooksrepo)
from easi_tools import EasiDefaults, xarray_object_size, notebook_utils

In [2]:
# Data tools
import numpy as np
from datetime import datetime

# Datacube
from datacube.utils import masking  # https://github.com/opendatacube/datacube-core/blob/develop/datacube/utils/masking.py
from odc.algo import enum_to_bool   # https://github.com/opendatacube/odc-tools/blob/develop/libs/algo/odc/algo/_masking.py
from odc.algo import xr_reproject   # https://github.com/opendatacube/odc-tools/blob/develop/libs/algo/odc/algo/_warp.py
from datacube.utils.geometry import GeoBox, box  # https://github.com/opendatacube/datacube-core/blob/develop/datacube/utils/geometry/_base.py

# Holoviews, Datashader and Bokeh
import hvplot.pandas
import hvplot.xarray
import holoviews as hv
import panel as pn
import colorcet as cc
import cartopy.crs as ccrs
from datashader import reductions
from holoviews import opts
# import geoviews as gv
# from holoviews.operation.datashader import rasterize
hv.extension('bokeh', logo=False)

# Dask
from dask.distributed import Client, LocalCluster

In [3]:
easi = EasiDefaults()

family = 'sentinel-2'
product = easi.product(family)
display(Markdown(f'Default {family} product for "{easi.name}": [{product}]({easi.explorer}/products/{product})'))

Successfully found configuration for deployment "asia"


Default sentinel-2 product for "asia": [s2_l2a](https://explorer.asia.easi-eo.solutions/products/s2_l2a)

In [4]:
# Local cluster
# Default is to run on a compute node with 28 GiB of available memory and 8 cores.
# We'll make that explicit here .. but this should be adjusted based on your workflow

cluster = LocalCluster(n_workers=2, threads_per_worker=4)
cluster.scale(n=2, memory="14GiB")
client = Client(cluster)
display(client)

dashboard_address = notebook_utils.localcluster_dashboard(client=client,server=easi.hub)
display(dashboard_address)

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 8,Total memory: 124.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:36649,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 124.00 GiB

0,1
Comm: tcp://127.0.0.1:39715,Total threads: 4
Dashboard: http://127.0.0.1:44887/status,Memory: 62.00 GiB
Nanny: tcp://127.0.0.1:44811,
Local directory: /tmp/dask-scratch-space/worker-mvzrihyr,Local directory: /tmp/dask-scratch-space/worker-mvzrihyr

0,1
Comm: tcp://127.0.0.1:44473,Total threads: 4
Dashboard: http://127.0.0.1:45189/status,Memory: 62.00 GiB
Nanny: tcp://127.0.0.1:34831,
Local directory: /tmp/dask-scratch-space/worker-1_5t_nx5,Local directory: /tmp/dask-scratch-space/worker-1_5t_nx5


'https://hub.asia.easi-eo.solutions/user/dhnghia/proxy/8787/status'

In [5]:
dc = datacube.Datacube()

# Access AWS "requester-pays" buckets
# This is necessary for reading data from most third-party AWS S3 buckets such as for Landsat and Sentinel-2
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client);

In [12]:
from utils import load_data_geo
import geopandas as gpd
from deafrica_tools.areaofinterest import define_area
from datacube.utils.geometry import Geometry
import xarray as xr
train_path = "train/Soc Trang_Traning.shp"
train = load_data_geo(train_path)
train.head()

Unnamed: 0,Name,geometry
0,Water1,POINT (620223.628 1072681.101)
1,Water1,POINT (621057.521 1071074.602)
2,Water1,POINT (621891.602 1069430.621)
3,Water1,POINT (622762.564 1068048.689)
4,Water1,POINT (623256.484 1066777.765)


In [7]:
train = train.to_crs('EPSG:4326')

In [8]:
train.head()

Unnamed: 0,Name,geometry
0,Water1,POINT (106.09593 9.70220)
1,Water1,POINT (106.10348 9.68765)
2,Water1,POINT (106.11104 9.67276)
3,Water1,POINT (106.11893 9.66024)
4,Water1,POINT (106.12340 9.64873)


In [9]:
train.head().explore(column="Name", legend=True)

In [None]:
min_date = '2022-01-01' # 2021-11-01
max_date = '2022-02-01' # 2022-01-01
product = 's2_l2a'

loaded_datasets = {}

# Iterate over each point in the GeoDataFrame
for idx, point in train.iterrows():
    # Create a bounding box around the point
    aoi = define_area(lat=point.geometry.y, lon=point.geometry.x, buffer=0.0001)
    geopolygon = Geometry(aoi["features"][0]["geometry"], crs=train.crs)
    geopolygon_gdf = gpd.GeoDataFrame(geometry=[geopolygon], crs=train.crs)
    # Get the latitude and longitude range of the geopolygon
    lat_range = (geopolygon_gdf.total_bounds[1], geopolygon_gdf.total_bounds[3])
    lon_range = (geopolygon_gdf.total_bounds[0], geopolygon_gdf.total_bounds[2])
    query = {
            "product": product,
            "x": lon_range, 
            "y": lat_range,
            "time": (min_date, max_date),
    }
    ncrs = notebook_utils.mostcommon_crs(dc, query)
    # print(ncrs)
    query.update({
            "output_crs": ncrs,
            "resolution": (-10, 10),
            "dask_chunks": {'x': 2048, 'y': 2048}
     })
    # print(qr)
    data = dc.load(**qr)

    # Store the loaded dataset in the dictionary with a key based on the point index
    key = f'point_{idx + 1}'
    
    valid_mask = masking.valid_data_mask(data)
    
    measurement_info = dc.list_measurements().loc[query['product']]

    # Separate lists of measurement names and flag names
    measurement_names = measurement_info[ pd.isnull(measurement_info.flags_definition)].index
    flag_names        = measurement_info[pd.notnull(measurement_info.flags_definition)].index

    flags_def = masking.describe_variable_flags(data[flag]).values
    flags_def = flags_def.tolist()[0][1]
    flag_name = 'scl'
    flag_data = data[[flag_name]].where(valid_mask[flag_name]).persist()   # Dataset

    good_pixel_flags = [flags_def[str(i)] for i in [4, 5, 6]]

    good_pixel_mask = enum_to_bool(data[flag_name], good_pixel_flags)
    rs = []
    for layer_name in ['red', 'green', 'blue', 'nir']:

        # Get scaling and offset values from product description
        scale = measurement_info.loc[layer_name].scale_factor
        offset = measurement_info.loc[layer_name].add_offset

        # Apply valid mask and good pixel mask
        layer = data[[layer_name]].where(valid_mask[layer_name] & good_pixel_mask) * scale + offset
        layer = layer.persist()
        rs.append(layer)
    result = rs[0].merge(rs[1])
    result = result.merge(rs[2])
    result = result.merge(rs[3])
    
    loaded_datasets[key] = result

In [24]:
len(loaded_datasets)

1051

In [26]:
from deafrica_tools.bandindices import calculate_indices
import numpy as np

In [30]:
ndivi_dataset = {}
for i in loaded_datasets.keys():
    tmp = calculate_indices(loaded_datasets[i], index='NDVI', satellite_mission='s2')
    ndivi_dataset[i] = tmp.NDVI.mean(dim='time')

In [31]:
len(ndivi_dataset)

1051

In [None]:
ndivi_dataset['point_1'].plot(cmap='RdYlGn',
           size=6, vmin=-2, vmax=2,
col_wrap=2)