# Extract Forest Structure


In [1]:
# Libraries and Functions
import pandas as pd
import sys
import random as rnd

rnd.seed(123)

sys.path.insert(0, "../../src")
from utilities import *
from run_mp import *
from pyprojroot import here

## User Input


In [3]:
functions_to_apply = [
    "mean",
    "std",
    "median",
    "max",
    "min",
    "sum",
    "range",
    "iqr",
]

variables_to_extract = [
    "htot",
    "age13",
    "ir5",
    "v",
    "ba_1",
    "ba_2",
    "ba_change_perc_yr",
]

species_variable_to_use = "espar_red"
# ! Note: Only works for espar_red right now. No need to account for other species variables right now.

spatial_level_at_which_to_extract = "idp"  # use reg, ser, gre, dep, hex
# ! Note: Only works for idp right now, others need code that accounts for per-year aggregation and not over entire region.

## Load NFI Data


In [3]:
nfi_raw = pd.read_feather(here("data/tmp/nfi/nfi_ready_for_analysis.feather"))
print("Number of trees: ", len(nfi_raw))
print("Number of sites: ", len(nfi_raw.idp.unique()))

Number of trees:  549255
Number of sites:  40231


## Extract Forest Structure


In [4]:
# Group and list nfi_raw by idp
nfi_raw["group_id"] = nfi_raw[spatial_level_at_which_to_extract]
df_list = split_df_into_list_of_group_or_ns(nfi_raw, "group_id")

In [5]:
# Try for one random group
aggregate_tree_info_to_site_level(
    df_in=rnd.choice(df_list),
    vars_in=variables_to_extract,
    fcts_to_apply=functions_to_apply,
    species_var=species_variable_to_use,
)

Unnamed: 0,group_id,n_species_espar_red,n_species_species_lat,n_species_genus_lat,allspecies_htot_mean,allspecies_age13_mean,allspecies_ir5_mean,allspecies_v_mean,allspecies_ba_1_mean,allspecies_ba_2_mean,...,trees_survived_with_mistl_at_v1_in_perc,trees_survived_with_frost_at_v1_in_perc,trees_survived_with_firrust_at_v1_in_perc,trees_survived_with_branchdmg_at_v1_in_perc,trees_survived_with_anydmg_at_v1_in_perc,trees_survived_with_frost_at_v2_in_perc,trees_survived_with_mistl_at_v2_in_perc,trees_survived_with_firrust_at_v2_in_perc,trees_survived_with_branchdmg_at_v2_in_perc,trees_survived_with_anydmg_at_v2_in_perc
0,545208,2,2,2,11.428571,39.5,0.012571,0.124844,1.187383,1.935184,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Try for 20 random groups with multiprocessing
n_random = min(20, len(df_list))

from utilities import aggregate_tree_info_to_site_level

run_mp(
    aggregate_tree_info_to_site_level,
    rnd.sample(df_list, n_random),
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    vars_in=variables_to_extract,
    fcts_to_apply=functions_to_apply,
    species_var=species_variable_to_use,
)

100%|██████████| 20/20 [00:06<00:00,  2.99it/s]


Unnamed: 0,group_id,n_species_espar_red,n_species_species_lat,n_species_genus_lat,allspecies_htot_mean,allspecies_age13_mean,allspecies_ir5_mean,allspecies_v_mean,allspecies_ba_1_mean,allspecies_ba_2_mean,...,trees_survived_with_mistl_at_v1_in_perc,trees_survived_with_frost_at_v1_in_perc,trees_survived_with_firrust_at_v1_in_perc,trees_survived_with_branchdmg_at_v1_in_perc,trees_survived_with_anydmg_at_v1_in_perc,trees_survived_with_frost_at_v2_in_perc,trees_survived_with_mistl_at_v2_in_perc,trees_survived_with_firrust_at_v2_in_perc,trees_survived_with_branchdmg_at_v2_in_perc,trees_survived_with_anydmg_at_v2_in_perc
0,765168,2,2,2,35.606667,55.0,0.01242,3.087042,10.899942,13.29437,...,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0
0,575850,1,1,1,27.154545,108.0,0.009209,1.342657,5.75403,6.34822,...,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.181818,0.181818
0,932139,2,2,2,10.2,42.0,0.00675,0.032147,0.445332,0.602673,...,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0
0,764134,5,5,4,14.52,85.0,0.007747,0.587515,3.660182,3.967342,...,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0
0,609270,1,1,1,25.4,112.0,0.0218,1.731268,8.997726,10.505386,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,532525,1,1,1,14.033333,59.0,0.008267,0.572422,4.514332,4.872063,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,904847,2,2,2,12.366667,31.5,0.026967,0.380965,3.677824,5.120103,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1100240,1,1,1,23.933333,54.5,0.014833,1.111486,5.41362,6.026794,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1110959,3,3,2,17.4125,137.0,0.00718,0.862081,3.778632,4.39964,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,842963,1,1,1,18.0875,20.0,0.010825,0.171962,1.124405,1.636074,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Run multiprocess for all sites (40k sites take ca. 90mins for all metrics)
from utilities import aggregate_tree_info_to_site_level

df_mp = run_mp(
    aggregate_tree_info_to_site_level,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    vars_in=variables_to_extract,
    fcts_to_apply=functions_to_apply,
    species_var=species_variable_to_use,
)

100%|██████████| 40231/40231 [1:55:15<00:00,  5.82it/s]  


In [6]:
# Reset index
df_mp = df_mp.reset_index(drop=True)
# Reset name of grouping variable
df_mp = df_mp.rename(columns={"group_id": spatial_level_at_which_to_extract})
# Save it
df_mp.to_feather(
    here(
        f"data/final/predictor_datasets/forest_structure-{spatial_level_at_which_to_extract}.feather"
    )
)

In [5]:
# Read it
df_mp = pd.read_feather(
    here(
        f"data/final/predictor_datasets/forest_structure-{spatial_level_at_which_to_extract}.feather"
    )
)
df_mp

Unnamed: 0,idp,n_species_espar_red,n_species_species_lat,n_species_genus_lat,allspecies_htot_mean,allspecies_age13_mean,allspecies_ir5_mean,allspecies_v_mean,allspecies_ba_1_mean,allspecies_ba_2_mean,...,trees_survived_with_mistl_at_v1_in_perc,trees_survived_with_frost_at_v1_in_perc,trees_survived_with_firrust_at_v1_in_perc,trees_survived_with_branchdmg_at_v1_in_perc,trees_survived_with_anydmg_at_v1_in_perc,trees_survived_with_frost_at_v2_in_perc,trees_survived_with_mistl_at_v2_in_perc,trees_survived_with_firrust_at_v2_in_perc,trees_survived_with_branchdmg_at_v2_in_perc,trees_survived_with_anydmg_at_v2_in_perc
0,500002,2,2,2,24.220000,127.5,0.007190,1.887303,8.808094,9.478110,...,0.0,0.200000,0.0,0.0,0.200000,0.1,0.0,0.0,0.0,0.1
1,500008,2,2,2,9.172727,17.0,0.013175,0.032720,0.372001,0.613297,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,500012,2,2,2,17.754545,94.5,0.007955,0.545947,3.207883,3.576881,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,500013,5,5,5,16.400000,87.0,0.007075,0.419032,2.355131,2.747332,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
4,500042,1,1,1,20.900000,101.5,0.011220,1.698988,9.825804,10.649829,...,0.0,0.000000,0.0,0.0,0.000000,0.2,0.0,0.0,0.0,0.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40226,1131409,2,2,2,19.833333,111.0,0.009667,1.575676,7.385535,8.239828,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
40227,1131410,5,5,5,10.762500,48.0,0.005500,0.223353,1.832734,1.927942,...,0.0,0.000000,0.0,0.0,0.250000,0.0,0.0,0.0,0.0,0.0
40228,1131419,2,2,2,9.716667,12.5,0.022025,0.036744,0.500881,0.834112,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
40229,1131424,1,1,1,23.550000,47.5,0.015050,0.805833,4.465785,5.100444,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


### Using subset of trees


In [None]:
# have not started this yet, because it seems to me a bit useless to focus only on the structure of the subset of trees... but maybe I am wrong