# Phenolopy

## Load packages

### Set up a dask cluster

In [None]:
%matplotlib inline
%load_ext autoreload

import os, sys
import xarray as xr
import numpy as np
import pandas as pd
import datacube
import matplotlib.pyplot as plt

from scipy.signal import savgol_filter, wiener
from scipy.stats import zscore
from statsmodels.tsa.seasonal import STL as stl
from datacube.drivers.netcdf import write_dataset_to_netcdf

sys.path.append('../Scripts')
from dea_datahandling import load_ard
from dea_dask import create_local_dask_cluster
from dea_plotting import display_map, rgb

sys.path.append('./scripts')
import phenolopy

In [None]:
# initialise the cluster. paste url into dask panel for more info.
create_local_dask_cluster()

In [None]:
# open up a datacube connection
dc = datacube.Datacube(app='phenolopy')

## Study area and data setup

### Set study area and time range

In [None]:
# set lat, lon (y, x) dictionary of testing areas for gdv project
loc_dict = {
    'test_a':   (-23.28043, 119.85931),
    'test_b':   (-31.60693, 116.94264)
}

# set buffer length and height (x, y)
buf_dict = {
    'test_a': (0.025, 0.025),
    'test_b': (0.025, 0.025),
}

In [None]:
# select location from dict
study_area = 'test_a'

# set buffer size in lon, lat (x, y)
lon_buff, lat_buff = buf_dict[study_area][0], buf_dict[study_area][1]

# select time range. for a specific year, set same year with month 01 to 12. multiple years will be averaged.
time_range = ('2016-11', '2018-02')

In [None]:
# select a study area from existing dict
lat, lon = loc_dict[study_area][0], loc_dict[study_area][1]

# combine centroid with buffer to form study boundary
lat_extent = (lat - lat_buff, lat + lat_buff)
lon_extent = (lon - lon_buff, lon + lon_buff)

# display onto interacrive map
display_map(x=lon_extent, y=lat_extent)

### Load sentinel-2a, b data for above parameters


In [None]:
# set measurements (bands)
measurements = [
    'nbart_blue',
    'nbart_green',
    'nbart_red',
    'nbart_nir_1',
    'nbart_swir_2'
]

# create query from above and expected info
query = {
    'x': lon_extent,
    'y': lat_extent,
    'time': time_range,
    'measurements': measurements,
    'output_crs': 'EPSG:3577',
    'resolution': (-10, 10),
    'group_by': 'solar_day',
}

# load sentinel 2 data
ds = load_ard(
    dc=dc,
    products=['s2a_ard_granule', 's2b_ard_granule'],
    min_gooddata=0.90,
    dask_chunks={'time': 1},
    **query
)

# display dataset
print(ds)

# display a rgb data result of temporary resampled median 
#rgb(ds.resample(time='1M').median(), bands=['nbart_red', 'nbart_green', 'nbart_blue'], col='time', col_wrap=12)

### Conform DEA band names

In [None]:
# takes our dask ds and conforms (renames) bands
ds = phenolopy.conform_dea_band_names(ds)

# display dataset
print(ds)

### Calculate vegetation index

In [None]:
# takes our dask ds and calculates veg index from spectral bands
ds = phenolopy.calc_vege_index(ds, index='mavi', drop=True)

# display dataset
print(ds)

## Pre-processing phase

### Group data by month and reduce by median

In [None]:
# take our dask ds and group and reduce dataset in median weeks (26 for one year)
ds = phenolopy.group(ds, group_by='month', reducer='median')

# display dataset
print(ds)

In [None]:
# show times
ds = ds.compute()

### Remove outliers from dataset on per-pixel basis

In [None]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# takes our dask ds and remove outliers from data using median method
ds = phenolopy.remove_outliers(ds=ds, method='median', user_factor=2, z_pval=0.05)

# display dataset
print(ds)

### Resample dataset down to bi-monthly medians

In [None]:
# takes our dask ds and resamples data to bi-monthly medians
ds = phenolopy.resample(ds, interval='1W', reducer='median')

# display dataset
print(ds)

### Interpolate missing (i.e. nan) values linearly

In [None]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# takes our dask ds and interpolates missing values
ds = phenolopy.interpolate(ds=ds, method='interpolate_na')

# display dataset
print(ds)

### Smooth data on per-pixel basis

In [None]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# take our dask ds and smooth using savitsky golay filter
ds = phenolopy.smooth(ds=ds, method='savitsky', window_length=3, polyorder=1)

# display dataset
print(ds)

### Calculate number of seasons 

In [None]:
# chunk dask to -1 to make compatible with this function
ds = ds.chunk({'time': -1})

# take our dask ds and smooth using savitsky golay filter
da_num_seasons = phenolopy.calc_num_seasons(ds=ds)

# display dataset
print(da_num_seasons)

## Calculate Phenometrics

In [None]:
# compute
ds = ds.compute()
print(ds)

In [None]:
# calc phenometrics via phenolopy!
ds_phenos = phenolopy.calc_phenometrics(da=ds['veg_index'], peak_metric='pos', base_metric='vos', method='seasonal_amplitude', factor=0.2, thresh_sides='two_sided', abs_value=0.1)

## Display phenometric

In [None]:
# set the metric you want to view (e.g. pos_values, pos_times, vos_values, vos_times, ect.)
metric_name = 'vos_values'

# plot this on map
ds_phenos[metric_name].plot(robust=True, cmap='terrain_r')