In [121]:
from src.data import gedi_pipeline
from src.data import k_nn
from src.processing.control import placebo

import numpy as np
import importlib
importlib.reload(gedi_pipeline)
importlib.reload(k_nn)
importlib.reload(placebo)

2023-06-01 11:27:45,577 DEBUG: Logger /home/jk871/fire-regen/src/data/gedi_pipeline.py already set up. [in get_logger at /home/jk871/fire-regen/src/utils/logging_util.py:51]


<module 'src.processing.control.placebo' from '/home/jk871/fire-regen/src/processing/control/placebo.py'>

### Test Placibo methods on a smaller set of GEDI shots 

In [18]:
gedi_gpd= gedi_pipeline.get_gedi_as_gdp('/maps/fire-regen/data/seki_matched_all.csv')
print(f'Total number of GEDI shots available for the region: {gedi_gpd.shape[0]}')

Total number of GEDI shots available for the region: 312314


In [19]:
gedi_gpd = gedi_pipeline.process_shots(gedi_gpd)

In [20]:
gedi_burned, gedi_unburned = gedi_pipeline.filter_shots(gedi_gpd)

2023-06-01 09:35:29,718 DEBUG: Excluded shots on the burn boundaries, shots remaining:         282025 [in filter_shots at /home/jk871/fire-regen/src/data/gedi_pipeline.py:57]
2023-06-01 09:35:29,730 DEBUG: Number of GEDI shots that burned at least once:                  56387 [in filter_shots at /home/jk871/fire-regen/src/data/gedi_pipeline.py:62]
2023-06-01 09:35:29,756 DEBUG: Number of GEDI shots that never burned since 1984:         215157 [in filter_shots at /home/jk871/fire-regen/src/data/gedi_pipeline.py:68]


In [21]:
gedi_burned = gedi_pipeline.filter_shots_for_regrowth_analysis(gedi_burned)

2023-06-01 09:35:54,172 DEBUG: Number of shots that happened after fires:                    37578 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:85]
2023-06-01 09:35:54,187 DEBUG: Number of shots that burned exactly once:                    34411 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:90]
2023-06-01 09:35:54,203 DEBUG: Number of shots that burned in 2-4 categories:                    27777 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:95]
2023-06-01 09:35:54,211 DEBUG: Number of GEDI shots that have a perfect match with burn                    raster (all 2x2 pixels have the same severity):                    17565 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:99]


In [22]:
gedi_burned_trees = gedi_pipeline.filter_for_trees(gedi_burned)
gedi_unburned_trees = gedi_pipeline.filter_for_trees(gedi_unburned)
print(f'GEDI shots that burned trees: {gedi_burned_trees.shape[0]}')
print(f'GEDI tree shots that didn\'t burn: {gedi_unburned_trees.shape[0]}')

GEDI shots that burned trees: 11721
GEDI tree shots that didn't burn: 62823


### Debugging of k_nn lib

In [31]:
nn_indeces, nn_distances, nn_avg_agbd = k_nn.nearest_neighbors_old(gedi_burned_trees, gedi_unburned_trees, 'agbd', 200)

In [32]:
nn_indeces_new, nn_distances_new = k_nn.nearest_neighbors(gedi_burned_trees, gedi_unburned_trees, 200)

In [46]:
result = placebo.closest_200_mean(gedi_burned_trees, gedi_unburned_trees, 'agbd', 'nn_agbd')

In [48]:
result.nn_agbd

462       214.981030
683       190.635328
901       168.270555
938       175.380558
1062      170.336469
             ...    
767284     98.904641
767479    104.267835
767506    105.929505
767563    105.929505
767909    100.340058
Name: nn_agbd, Length: 11721, dtype: float64

In [49]:
nn_avg_agbd

array([214.98103021, 190.63532797, 168.27055503, ..., 105.92950545,
       105.92950545, 100.34005766])

In [54]:
placebo.calculate_rmse(result.agbd, result.agbd)

0.0

In [58]:
placebo.evaluate_control(gedi_unburned_trees, gedi_unburned_trees, 'agbd', 'nn_agbd', placebo.closest_200_mean)

0.708212

In [None]:
placebo.evaluate_control(gedi_unburned_trees, gedi_unburned_trees, 'agbd', 'nn_agbd', placebo.closest_200_median)

## Evaluate Existing Control Methods

### Nearest Neighbour - no clustering

In [93]:
# 200 closest
matched_idx_200, matched_distances_200 = k_nn.nearest_neighbors(gedi_unburned_trees, gedi_unburned_trees, 200)

In [66]:
rmse_200_mean = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_200, 'agbd', lambda x: x.mean())
print(f'Taking the mean of the closest 200 results in the error of: {rmse_200_mean}')

Taking the mean of the closest 200 results in the error of: 4.135897


In [67]:
rmse_200_median = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_200, 'agbd', lambda x: x.median())
print(f'Taking the median of the closest 200 results in the error of: {rmse_200_median}')

Taking the median of the closest 200 results in the error of: 3.339611


In [90]:
# 10 closest
matched_idx_10, matched_distances_10 = k_nn.nearest_neighbors(gedi_unburned_trees, gedi_unburned_trees, 10)

In [69]:
rmse_10_mean = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_10, 'agbd', lambda x: x.mean())
print(f'Taking the mean of the closest 10 results in the error of: {rmse_10_mean}')

Taking the mean of the closest 10 results in the error of: 3.913911


In [87]:
rmse_10_median = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_10, 'agbd', lambda x: x.median())
print(f'Taking the median of the closest 10 results in the error of: {rmse_10_median}')

Taking the median of the closest 10 results in the error of: 3.654584


In [88]:
# 1000 closest
matched_idx_1000, matched_distances_1000 = k_nn.nearest_neighbors(gedi_unburned_trees, gedi_unburned_trees, 1000)

In [89]:
rmse_1000_mean = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, 'agbd', lambda x: x.mean())
print(f'Taking the mean of the closest 1000 results in the error of: {rmse_1000_mean}')

Taking the mean of the closest 1000 results in the error of: 4.187118


### Algo 2 - Take only points within certain distance

In [120]:
matched_idx_1000, matched_distances_1000 = k_nn.nearest_neighbors(gedi_unburned_trees, gedi_unburned_trees, 1000)

In [122]:
rmse_200m_mean = placebo.calc_rmse_from_matches_based_on_distance(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, matched_distances_1000, 'agbd', lambda x: x.mean(), 200)
print(f'Taking the mean of the points within 200m radius results in the error of: {rmse_200m_mean}')

IndexError: invalid index to scalar variable.

In [96]:
matched_distances_1000[0].max()

7099.520938892842

In [123]:
stack = np.stack([matched_idx_10, matched_distances_10], axis=1)

In [130]:
stack.shape

(62823, 2, 10)

In [None]:
np.apply_along_axis(lambda x: print(x), 1, stack)

In [116]:
matched_idx_10[0][np.where(matched_distances_10[0] < 250)[0]]

array([   0,    1,    2, 7161, 8297, 8296])

In [114]:
matched_idx_10[0][np.array([1,2])]

array([1, 2])

In [117]:
np.where(matched_distances_10[0] < 250)

(array([0, 1, 2, 3, 4, 5]),)

In [118]:
matched_idx_10[0]

array([    0,     1,     2,  7161,  8297,  8296,  8295, 61543, 61530,
        8293])