# Phenolopy

## Load packages

### Set up a dask cluster

In [1]:
%matplotlib inline
%load_ext autoreload

import os, sys
import xarray as xr
import numpy as np
import pandas as pd
import datacube
import matplotlib.pyplot as plt

from scipy.signal import savgol_filter, wiener
from scipy.stats import zscore
from statsmodels.tsa.seasonal import STL as stl
from datacube.drivers.netcdf import write_dataset_to_netcdf

sys.path.append('../Scripts')
from dea_datahandling import load_ard
from dea_dask import create_local_dask_cluster
from dea_plotting import display_map, rgb

sys.path.append('./scripts')
import phenolopy

In [2]:
# initialise the cluster. paste url into dask panel for more info.
create_local_dask_cluster()

0,1
Client  Scheduler: tcp://127.0.0.1:33243  Dashboard: /user/lewis/proxy/8787/status,Cluster  Workers: 1  Cores: 2  Memory: 14.18 GB


In [3]:
# open up a datacube connection
dc = datacube.Datacube(app='phenolopy')

## Study area and data setup

### Set study area and time range

In [4]:
# set lat, lon (y, x) dictionary of testing areas for gdv project
loc_dict = {
    'test_a':   (-23.28043, 119.85931),
    'test_b':   (-31.60693, 116.94264)
}

# set buffer length and height (x, y)
buf_dict = {
    'test_a': (0.025, 0.025),
    'test_b': (0.025, 0.025),
}

In [6]:
# select location from dict
study_area = 'test_a'

# set buffer size in lon, lat (x, y)
lon_buff, lat_buff = buf_dict[study_area][0], buf_dict[study_area][1]

# select time range. for a specific year, set same year with month 01 to 12. multiple years will be averaged.
time_range = ('2015-11', '2020-02')

In [7]:
# select a study area from existing dict
lat, lon = loc_dict[study_area][0], loc_dict[study_area][1]

# combine centroid with buffer to form study boundary
lat_extent = (lat - lat_buff, lat + lat_buff)
lon_extent = (lon - lon_buff, lon + lon_buff)

# display onto interacrive map
display_map(x=lon_extent, y=lat_extent)

### Load sentinel-2a, b data for above parameters


In [8]:
# set measurements (bands)
measurements = [
    'nbart_blue',
    'nbart_green',
    'nbart_red',
    'nbart_nir_1',
    'nbart_swir_2'
]

# create query from above and expected info
query = {
    'x': lon_extent,
    'y': lat_extent,
    'time': time_range,
    'measurements': measurements,
    'output_crs': 'EPSG:3577',
    'resolution': (-10, 10),
    'group_by': 'solar_day',
}

# load sentinel 2 data
ds = load_ard(
    dc=dc,
    products=['s2a_ard_granule', 's2b_ard_granule'],
    min_gooddata=0.90,
    dask_chunks={'time': 1},
    **query
)

# display dataset
print(ds)



Finding datasets
    s2a_ard_granule
    s2b_ard_granule
Counting good quality pixels for each time step
Filtering to 140 out of 184 time steps with at least 90.0% good quality pixels
Applying pixel quality/cloud mask
Returning 140 time steps as a dask array
<xarray.Dataset>
Dimensions:       (time: 140, x: 559, y: 605)
Coordinates:
  * time          (time) datetime64[ns] 2015-11-21T02:04:12.031000 ... 2020-0...
  * y             (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x             (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref   int32 3577
Data variables:
    nbart_blue    (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    nbart_green   (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    nbart_red     (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    nbart_nir_1   (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    nbart_swir_

### Conform DEA band names

In [9]:
# takes our dask ds and conforms (renames) bands
ds = phenolopy.conform_dea_band_names(ds)

# display dataset
print(ds)

Conforming satellite bands
> Satellite band names conformed successfully.

<xarray.Dataset>
Dimensions:      (time: 140, x: 559, y: 605)
Coordinates:
  * time         (time) datetime64[ns] 2015-11-21T02:04:12.031000 ... 2020-02...
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 3577
Data variables:
    blue         (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    green        (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    red          (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    nir          (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
    swir1        (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
Attributes:
    crs:           EPSG:3577
    grid_mapping:  spatial_ref


### Calculate vegetation index

In [10]:
# takes our dask ds and calculates veg index from spectral bands
ds = phenolopy.calc_vege_index(ds, index='mavi', drop=True)

# display dataset
print(ds)

Generating vegetation index: mavi
> Drop bands set to True. Dropping these bands: ['blue', 'green', 'red', 'nir', 'swir1']
> Vegetation index calculated successfully.

<xarray.Dataset>
Dimensions:      (time: 140, x: 559, y: 605)
Coordinates:
  * time         (time) datetime64[ns] 2015-11-21T02:04:12.031000 ... 2020-02...
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 3577
Data variables:
    veg_index    (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>
Attributes:
    crs:           EPSG:3577
    grid_mapping:  spatial_ref


## Pre-processing phase

### Group data by month and reduce by median

In [11]:
# take our dask ds and group and reduce dataset in median weeks (26 for one year)
ds = phenolopy.group(ds, group_by='month', reducer='median')

# display dataset
print(ds)

Group dataset interval: month via reducer: median
> Selecting year: 2018 to re-label times after groupby.
> Group successful.

<xarray.Dataset>
Dimensions:      (time: 12, x: 559, y: 605)
Coordinates:
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 3577
  * time         (time) datetime64[ns] 2018-01-01 2018-02-01 ... 2018-12-01
Data variables:
    veg_index    (time, y, x) float32 dask.array<chunksize=(1, 605, 559), meta=np.ndarray>


### Remove outliers from dataset on per-pixel basis

In [12]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# takes our dask ds and remove outliers from data using median method
ds = phenolopy.remove_outliers(ds=ds, method='median', user_factor=2, z_pval=0.05)

# display dataset
print(ds)

Outlier removal method: median with a user factor of: 2
> Generated roll window size less than 3, setting to default (3).
> Outlier removal successful.

<xarray.Dataset>
Dimensions:      (time: 12, x: 559, y: 605)
Coordinates:
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 3577
  * time         (time) datetime64[ns] 2018-01-01 2018-02-01 ... 2018-12-01
Data variables:
    veg_index    (time, y, x) float32 dask.array<chunksize=(12, 605, 559), meta=np.ndarray>


### Resample dataset down to bi-monthly medians

In [13]:
# takes our dask ds and resamples data to bi-monthly medians
ds = phenolopy.resample(ds, interval='1W', reducer='median')

# display dataset
print(ds)

Resampling dataset interval: 1W via reducer: median
> Resample successful.

<xarray.Dataset>
Dimensions:      (time: 48, x: 559, y: 605)
Coordinates:
  * time         (time) datetime64[ns] 2018-01-07 2018-01-14 ... 2018-12-02
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 3577
Data variables:
    veg_index    (time, y, x) float32 dask.array<chunksize=(4, 605, 559), meta=np.ndarray>


### Interpolate missing (i.e. nan) values linearly

In [14]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# takes our dask ds and interpolates missing values
ds = phenolopy.interpolate(ds=ds, method='interpolate_na')

# display dataset
print(ds)

Interpolating dataset using method: interpolate_na.
> Interpolation successful.

<xarray.Dataset>
Dimensions:      (time: 48, x: 559, y: 605)
Coordinates:
    spatial_ref  int32 3577
  * time         (time) datetime64[ns] 2018-01-07 2018-01-14 ... 2018-12-02
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
Data variables:
    veg_index    (time, y, x) float32 dask.array<chunksize=(48, 605, 559), meta=np.ndarray>


### Smooth data on per-pixel basis

In [15]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# take our dask ds and smooth using savitsky golay filter
ds = phenolopy.smooth(ds=ds, method='savitsky', window_length=3, polyorder=1)

# display dataset
print(ds)

Smoothing method: savitsky with window length: 3 and polyorder: 1.
> Smoothing successful.

<xarray.Dataset>
Dimensions:      (time: 48, x: 559, y: 605)
Coordinates:
  * time         (time) datetime64[ns] 2018-01-07 2018-01-14 ... 2018-12-02
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 dask.array<chunksize=(), meta=np.ndarray>
Data variables:
    veg_index    (time, y, x) float32 dask.array<chunksize=(48, 605, 559), meta=np.ndarray>


### Calculate number of seasons 

In [16]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# take our dask ds and smooth using savitsky golay filter
da_num_seasons = phenolopy.calc_num_seasons(ds=ds)

# display dataset
print(da_num_seasons)

Beginning calculation of number of seasons.
> Calculating number of seasons.
> Success!

<xarray.DataArray 'num_seasons' (y: 605, x: 559)>
dask.array<transpose, shape=(605, 559), dtype=int16, chunksize=(605, 559), chunktype=numpy.ndarray>
Coordinates:
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 dask.array<chunksize=(), meta=np.ndarray>


## Calculate Phenometrics

In [17]:
# compute
ds = ds.compute()
print(ds)

<xarray.Dataset>
Dimensions:      (time: 48, x: 559, y: 605)
Coordinates:
  * time         (time) datetime64[ns] 2018-01-07 2018-01-14 ... 2018-12-02
  * y            (y) float64 -2.565e+06 -2.565e+06 ... -2.571e+06 -2.571e+06
  * x            (x) float64 -1.231e+06 -1.231e+06 ... -1.225e+06 -1.225e+06
    spatial_ref  int32 3577
Data variables:
    veg_index    (time, y, x) float32 0.10106007 0.112709396 ... 0.08980583


In [None]:
# calc phenometrics via phenolopy!
ds_phenos = phenolopy.calc_phenometrics(da=ds['veg_index'], peak_metric='pos', base_metric='vos', method='seasonal_amplitude', factor=0.2, thresh_sides='two_sided', abs_value=0.1)

## Display phenometric

In [None]:
# set the metric you want to view (e.g. pos_values, pos_times, vos_values, vos_times, ect.)
metric_name = 'vos_values'

# plot this on map
ds_phenos[metric_name].plot(robust=True, cmap='terrain_r')