In [72]:
from src.data import gedi_pipeline
from src.data import k_nn
from src.processing.control import placebo
from src.data import clustering

import numpy as np
import importlib
importlib.reload(gedi_pipeline)
importlib.reload(k_nn)
importlib.reload(placebo)

2023-06-01 13:50:12,728 DEBUG: Logger /home/jk871/fire-regen/src/data/gedi_pipeline.py already set up. [in get_logger at /home/jk871/fire-regen/src/utils/logging_util.py:51]


<module 'src.processing.control.placebo' from '/home/jk871/fire-regen/src/processing/control/placebo.py'>

### Test Placibo methods on a smaller set of GEDI shots 

In [54]:
gedi_gpd= gedi_pipeline.get_gedi_as_gdp('/maps/fire-regen/data/seki_matched_all.csv')
print(f'Total number of GEDI shots available for the region: {gedi_gpd.shape[0]}')

Total number of GEDI shots available for the region: 312314


In [57]:
gedi_gpd = gedi_pipeline.process_shots(gedi_gpd)

In [58]:
gedi_burned, gedi_unburned = gedi_pipeline.filter_shots(gedi_gpd)

2023-06-01 13:39:36,841 DEBUG: Excluded shots on the burn boundaries, shots remaining:         282025 [in filter_shots at /home/jk871/fire-regen/src/data/gedi_pipeline.py:57]
2023-06-01 13:39:36,854 DEBUG: Number of GEDI shots that burned at least once:                  56387 [in filter_shots at /home/jk871/fire-regen/src/data/gedi_pipeline.py:62]
2023-06-01 13:39:36,889 DEBUG: Number of GEDI shots that never burned since 1984:         215157 [in filter_shots at /home/jk871/fire-regen/src/data/gedi_pipeline.py:68]


In [5]:
gedi_burned = gedi_pipeline.filter_shots_for_regrowth_analysis(gedi_burned)

2023-06-01 11:52:06,652 DEBUG: Number of shots that happened after fires:                    37578 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:85]
2023-06-01 11:52:06,666 DEBUG: Number of shots that burned exactly once:                    34411 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:90]
2023-06-01 11:52:06,681 DEBUG: Number of shots that burned in 2-4 categories:                    27777 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:95]
2023-06-01 11:52:06,687 DEBUG: Number of GEDI shots that have a perfect match with burn                    raster (all 2x2 pixels have the same severity):                    17565 [in filter_shots_for_regrowth_analysis at /home/jk871/fire-regen/src/data/gedi_pipeline.py:99]


In [59]:
gedi_burned_trees = gedi_pipeline.filter_for_trees(gedi_burned)
gedi_unburned_trees = gedi_pipeline.filter_for_trees(gedi_unburned)
print(f'GEDI shots that burned trees: {gedi_burned_trees.shape[0]}')
print(f'GEDI tree shots that didn\'t burn: {gedi_unburned_trees.shape[0]}')

GEDI shots that burned trees: 37728
GEDI tree shots that didn't burn: 62823


### Debugging of k_nn lib

In [31]:
nn_indeces, nn_distances, nn_avg_agbd = k_nn.nearest_neighbors_old(gedi_burned_trees, gedi_unburned_trees, 'agbd', 200)

In [32]:
nn_indeces_new, nn_distances_new = k_nn.nearest_neighbors(gedi_burned_trees, gedi_unburned_trees, 200)

In [46]:
result = placebo.closest_200_mean(gedi_burned_trees, gedi_unburned_trees, 'agbd', 'nn_agbd')

In [48]:
result.nn_agbd

462       214.981030
683       190.635328
901       168.270555
938       175.380558
1062      170.336469
             ...    
767284     98.904641
767479    104.267835
767506    105.929505
767563    105.929505
767909    100.340058
Name: nn_agbd, Length: 11721, dtype: float64

In [49]:
nn_avg_agbd

array([214.98103021, 190.63532797, 168.27055503, ..., 105.92950545,
       105.92950545, 100.34005766])

In [54]:
placebo.calculate_rmse(result.agbd, result.agbd)

0.0

In [58]:
placebo.evaluate_control(gedi_unburned_trees, gedi_unburned_trees, 'agbd', 'nn_agbd', placebo.closest_200_mean)

0.708212

In [None]:
placebo.evaluate_control(gedi_unburned_trees, gedi_unburned_trees, 'agbd', 'nn_agbd', placebo.closest_200_median)

## Evaluate Existing Control Methods

### Nearest Neighbour - no clustering

In [41]:
# 200 closest
matched_idx_200, matched_distances_200 = placebo.get_nearest_neighbors(gedi_unburned_trees, 200)

In [42]:
rmse_200_mean = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_200, 'agbd', lambda x: x.mean())
print(f'Taking the mean of the closest 200 results in the error of: {rmse_200_mean}')

Taking the mean of the closest 200 results in the error of: 4.15668


In [43]:
rmse_200_median = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_200, 'agbd', lambda x: x.median())
print(f'Taking the median of the closest 200 results in the error of: {rmse_200_median}')

Taking the median of the closest 200 results in the error of: 3.356693


In [44]:
# 10 closest
matched_idx_10, matched_distances_10 = placebo.get_nearest_neighbors(gedi_unburned_trees, 10)

In [45]:
rmse_10_mean = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_10, 'agbd', lambda x: x.mean())
print(f'Taking the mean of the closest 10 results in the error of: {rmse_10_mean}')

Taking the mean of the closest 10 results in the error of: 4.34879


In [46]:
rmse_10_median = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_10, 'agbd', lambda x: x.median())
print(f'Taking the median of the closest 10 results in the error of: {rmse_10_median}')

Taking the median of the closest 10 results in the error of: 4.165484


In [48]:
# 1000 closest
matched_idx_1000, matched_distances_1000 = placebo.get_nearest_neighbors(gedi_unburned_trees, 1000)

In [49]:
rmse_1000_mean = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, 'agbd', lambda x: x.mean())
print(f'Taking the mean of the closest 1000 results in the error of: {rmse_1000_mean}')

Taking the mean of the closest 1000 results in the error of: 4.191309


In [50]:
rmse_1000_median = placebo.calc_rmse_from_matches(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, 'agbd', lambda x: x.median())
print(f'Taking the median of the closest 1000 results in the error of: {rmse_1000_median}')

Taking the median of the closest 1000 results in the error of: 3.31847


In [51]:
rmse_closest = placebo.calc_rmse_from_closest_point(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, 'agbd')
print(f'Closest point in the error of: {rmse_closest}')

Closest point in the error of: 4.732752


### Algo 2 - Take only points within certain distance

In [52]:
rmse_200m_mean = placebo.calc_rmse_from_matches_based_on_distance(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, matched_distances_1000, 'agbd', lambda x: x.mean(), 200)
print(f'Taking the mean of the points within 200m radius results in the error of: {rmse_200m_mean}')

Taking the mean of the points within 200m radius results in the error of: 3.885359


In [53]:
rmse_500m_median = placebo.calc_rmse_from_matches_based_on_distance(gedi_unburned_trees, gedi_unburned_trees, matched_idx_1000, matched_distances_1000, 'agbd', lambda x: x.median(), 500)
print(f'Taking the median of the points within 500m radius results in the error of: {rmse_500m_median}')

Taking the median of the points within 500m radius results in the error of: 3.183494


In [9]:
stack = np.stack([matched_idx_10, matched_distances_10], axis=1)

In [11]:
np.ndenumerate(stack)

<numpy.ndenumerate at 0x7f6b9a1578b0>

In [13]:
output = np.empty(stack.shape[0])
for row in stack:
    print(row)
    

[[0.00000000e+00 1.00000000e+00 2.00000000e+00 3.00000000e+00
  7.40000000e+01 7.50000000e+01 7.60000000e+01 8.10000000e+01
  8.20000000e+01 8.30000000e+01]
 [0.00000000e+00 5.65361779e+01 1.12769481e+02 9.74615980e+02
  8.31387229e+03 8.67403501e+03 8.71122442e+03 8.73322032e+03
  8.83777454e+03 8.87311421e+03]]
[[1.00000000e+00 2.00000000e+00 0.00000000e+00 3.00000000e+00
  7.40000000e+01 7.50000000e+01 7.60000000e+01 8.10000000e+01
  8.20000000e+01 8.30000000e+01]
 [0.00000000e+00 5.62333040e+01 5.65361779e+01 9.40989370e+02
  8.27922806e+03 8.63712681e+03 8.67410529e+03 8.69886241e+03
  8.80273449e+03 8.83785191e+03]]
[[2.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+00
  7.40000000e+01 7.50000000e+01 7.60000000e+01 8.10000000e+01
  8.20000000e+01 8.30000000e+01]
 [0.00000000e+00 5.62333040e+01 1.12769481e+02 9.09809742e+02
  8.24502074e+03 8.60063857e+03 8.63740453e+03 8.66493055e+03
  8.76811595e+03 8.80300961e+03]]
[[3.00000000e+00 2.00000000e+00 1.00000000e+00 0.000000

In [None]:
np.apply_along_axis(lambda x: print(x), 1, stack)

In [116]:
matched_idx_10[0][np.where(matched_distances_10[0] < 250)[0]]

array([   0,    1,    2, 7161, 8297, 8296])

In [114]:
matched_idx_10[0][np.array([1,2])]

array([1, 2])

In [117]:
np.where(matched_distances_10[0] < 250)

(array([0, 1, 2, 3, 4, 5]),)

In [118]:
matched_idx_10[0]

array([    0,     1,     2,  7161,  8297,  8296,  8295, 61543, 61530,
        8293])

### Evaluate clustering

In [74]:
terrain_all_clustered_in_5 = clustering.cluster(gedi_unburned_trees, ['elevation', 'aspect', 'slope', 'soil'], 5)

[{'architecture': 'Zen',
  'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-15028c96.3.21.so',
  'internal_api': 'openblas',
  'num_threads': 1,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.21'},
 {'architecture': 'Zen',
  'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-41284840.3.18.so',
  'internal_api': 'openblas',
  'num_threads': 1,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.18'},
 {'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0',
  'internal_api': 'openmp',
  'num_threads': 1,
  'prefix': 'libgomp',
  'user_api': 'openmp',
  'version': None}]




In [75]:
placebo.find_closest_in_cluster(terrain_all_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.median(), 100)

3.376876

In [76]:
placebo.find_closest_in_cluster(terrain_all_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.median(), 200)

3.247833

In [77]:
terrain_all_clustered_in_10 = clustering.cluster(gedi_unburned_trees, ['elevation', 'aspect', 'slope', 'soil'], 10)

[{'architecture': 'Zen',
  'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-15028c96.3.21.so',
  'internal_api': 'openblas',
  'num_threads': 1,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.21'},
 {'architecture': 'Zen',
  'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-41284840.3.18.so',
  'internal_api': 'openblas',
  'num_threads': 1,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.18'},
 {'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0',
  'internal_api': 'openmp',
  'num_threads': 1,
  'prefix': 'libgomp',
  'user_api': 'openmp',
  'version': None}]




In [78]:
placebo.find_closest_in_cluster(terrain_all_clustered_in_10, "agbd", "nn_agbd", 10, lambda x: x.median(), 200)

3.343398

In [79]:
slope_elev_clustered_in_5 = clustering.cluster(gedi_unburned_trees, ['elevation', 'slope'], 5)

[{'architecture': 'Zen',
  'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/numpy.libs/libopenblas64_p-r0-15028c96.3.21.so',
  'internal_api': 'openblas',
  'num_threads': 1,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.21'},
 {'architecture': 'Zen',
  'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/scipy.libs/libopenblasp-r0-41284840.3.18.so',
  'internal_api': 'openblas',
  'num_threads': 1,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.18'},
 {'filepath': '/home/jk871/fire-regen/fire-regen-env/lib/python3.10/site-packages/scikit_learn.libs/libgomp-a34b3233.so.1.0.0',
  'internal_api': 'openmp',
  'num_threads': 1,
  'prefix': 'libgomp',
  'user_api': 'openmp',
  'version': None}]




In [80]:
placebo.find_closest_in_cluster(slope_elev_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.median(), 200)

3.226492

In [81]:
placebo.find_closest_in_cluster(slope_elev_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.median(), 50)

3.338938

In [82]:
placebo.find_closest_in_cluster(slope_elev_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.mean(), 200)

4.148457

In [83]:
placebo.find_closest_in_cluster(slope_elev_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.median(), 500)

3.170006

In [84]:
placebo.find_closest_in_cluster(slope_elev_clustered_in_5, "agbd", "nn_agbd", 5, lambda x: x.median(), 1000)

3.137952