In [None]:
import json
import os
import pickle
from pathlib import Path
from joblib import Parallel, delayed

import geopandas as gpd
import shapely
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import rioxarray

from xrspatial import hillshade
from datashader.colors import Set1
from datashader.transfer_functions import shade
from datashader.transfer_functions import stack
from datashader.transfer_functions import dynspread
from datashader.transfer_functions import set_background
from datashader.colors import Elevation

from xrspatial import focal, slope
import seaborn as sns
from tqdm import tqdm
from joblib_progress import joblib_progress
from xrspatial.multispectral import ndvi, savi
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay)
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.model_selection import RandomizedSearchCV as RSCV
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss

In [2]:
# paths
high_high_path = '/home/michael/TreeMortality/data/helena/treatment_polys/code15_n5.gpkg'
high_un_path = '/home/michael/TreeMortality/data/helena/treatment_polys/code12_n5.gpkg'
un_high_path = '/home/michael/TreeMortality/data/helena/treatment_polys/code3_n5.gpkg'
un_un_path = '/home/michael/TreeMortality/data/helena/treatment_polys/code0_n5.gpkg'
poly_paths = [high_high_path, high_un_path, un_high_path, un_un_path]

helena_path = Path.cwd().parent / 'data' / 'helena'
crown_path = helena_path / 'crowns'
crown_path_list = [
    c for c
    in crown_path.iterdir()
    if c.suffix == '.gpkg'
    ]

# open treatment polygons
df = pd.concat([gpd.read_file(p) for p in poly_paths])
df = df.drop('area_', axis=1)

We will first check each crown to see if it falls completely within one of the treatment class areas.  If so it will be appended to a datframe of crowns.


In [3]:
# jobs to run in ||
n_jobs = 23

def is_in_treatment(crown_df, row, buf):
    '''Returns only crowns with buffer completely within polygon'''
    crown_df.loc[
        buf.within(row.geometry),
        'treatment'] = row.attribute
    
    return crown_df[crown_df.treatment >= 0 ]


def label_treatment(f):
    crown_df = gpd.read_file(f)
    
    # get total bounds of tile as polygon
    bounds = crown_df.total_bounds
    bbox = shapely.geometry.box(*bounds)

    # use only treatment geometries which touch the tile
    sub_df = df[df.geometry.intersects(bbox)]
    if len(sub_df) > 0:
        # add treatment column
        crown_df['treatment'] = -99
        #buffer crowns
        buf = crown_df.geometry.buffer(10)
        # label treatments of crowns lying completely within poly
        return Parallel(n_jobs=n_jobs)(
            delayed(is_in_treatment)(crown_df, row, buf)
            for _, row in sub_df.iterrows()
            )
    else:
        # return empty df, but add treatment column first
        cols = list(crown_df.columns) + ['treatment']
        empty_df = pd.DataFrame(columns=cols)
        return [empty_df]


In [5]:
with joblib_progress('', total=len(crown_path_list)):
    results =  Parallel(n_jobs=n_jobs)(delayed(label_treatment)(f) for f in crown_path_list)
    

Output()

In [11]:
crown_df = pd.concat([item for sublist in results for item in sublist])

In [None]:
    
results = [label_treatment(f) for f in tqdm(crown_path_list)]
crown_df = pd.concat(results)

In [13]:
crown_df.head()

Unnamed: 0,IDdalponte,zmax,zmean,zsd,zskew,zkurt,zentropy,pzabovezmean,pzabove2,zq5,...,ipcumzq90,p1th,p2th,p3th,p4th,p5th,pground,n,area,geometry


In [None]:
# save
crown_df.to_file(helena_path / 'crowns_with_treatment_label.gpkg')

## Topographic Position Index
In order to look at the effects of slope position on tree mortality we will use an algorithm that replicates the Topographic Position Index (TPI). 

$$
\text{TPI} = round((\text{DEM} - \text{focalmean}(\text{DEM}, \;annulus)) + 0.5)
$$
where _annulus_ is an anuulus defined by an inner and outer radius $, r_{inner}$ and $r_{outer}$, 
In keeping consistent with the methods of Kane et al.\cite{kane2015} we will use $r_{outer}$ of 100 m, 250 m, 500 m, 1000 m, and 2000 m outer radii. Since the authors do not specify the inner radius used, here we will use the rule that $r_{inner} = \frac{r_{outer}}{2}$.


In [None]:
# read the DEM
dem = rioxarray.from_rasterio(helena_path / 'helena_DEM.tif')
slope_agg = slope(dem[0])


def topographic_position_index(dem, outer_radius, res=1):
    '''
    returns topological position index
    using r_inner = r_outer /2
    '''
    inner_radius = round(outer_radius / 2)
    kernel = focal.annulus_kernel(res, res, outer_radius, inner_radius)
    return dem - focal.apply(dem, kernel)

tpi = topographic_position_index(dem, 100)
tpi

In [None]:
tpi_terrain = hillshade(tpi)
tpi_terrain_shaded = shade(
    tpi_terrain, cmap=["white", "black"], alpha=255, how="linear"
)
illuminated = hillshade(dem)
illuminated_shaded = shade(illuminated, cmap=['gray', 'white'], alpha=255, how='linear')
stack(illuminated_shaded, tpi_terrain_shaded)

In [None]:
def slope_position(tpi, slope):
    std_dev = tpi.std()
    arr = tpi.to_numpy()
    slope_pos = np.zeros_like(arr)
    
    # class description breakpoints
    slope_pos[arr > std_dev] = 1 # ridge
    slope_pos[0.5 * std_dev < arr <= std_dev] = 2 # upper slope
    slope_pos[(-0.5 * std_dev < arr < 0.5 * std_dev) & (slope_agg > 5)] = 3  # middle slope
    slope_pos[(-0.5 * std_dev < arr < 0.5 * std_dev)  & (slope_agg <= 5)] = 4  # flats slope
    slope_pos[-std_dev < arr < -0.5 * std_dev] = 5 # lower slope
    slope_pos[arr < -std_dev] = 6 # valleys
    
    # numpy --> dataArray
    dset = tpi.to_dataset(name='whatever')
    dset['slope_pos'] = (('y', 'x'), slope_pos)
    return dset.slope_pos

slope_pos = slope_position(tpi, slope)
slope_pos

In [None]:
os.makedirs(helena_path / 'TPI')

for r in [100, 250, 500, 1000, 2000]:
    # add column full of no data vals
    crown_df[f'slope_position_{r}'] = -999
    
    # calctpi andslope position
    tpi = topographic_position_index(dem, r)
    slope_pos = slope_position(tpi, slope)
    
    # save as tiffs
    tpi.rio.to_raster(helena_path / 'TPI' / f'tpi_{r}.tif')
    slope_pos.rio.to_raster(helena_path / 'TPI' / f'slope_pos_{r}.tif')

    # free a little memory    
    del tpi

    # attach slope positions to crowns
    for _, row in crown_df.iterrows():
        # get mean tpi of pixels in crown
        row[f'slope_position_{r}'] = round(np.median(slope_pos.rio.clip([row.geometry]).to_numpy()))



In [None]:
for r in [100, 250, 500, 1000, 2000]:
    ...
    # calculate TPI
    
    # make sample stratified by treatment and TPI class