# Spatial Patterns of Change


## Imports and Functions


In [1]:
# Data wrangling
import pandas as pd
import numpy as np
import geopandas as gpd
import h3pandas

# Data visualisation
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pylab import f

# My functions
import sys

sys.path.insert(0, "../../src")
from run_mp import *
from utilities import *
from random_forest_utils import *

# Other
from os import error
import warnings
import chime
from pyprojroot import here

chime.theme("mario")

# Magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

## User Input


In [18]:
force_rerunning_all_sites = False
run_only_subset = True
subset_fraction = 0.10

## Load Data


In [3]:
# Load all shapefiles
shp_cty = gpd.read_file(here("data/raw/maps/france_geojson/cty.geojson"))
shp_reg = gpd.read_file(here("data/raw/maps/france_geojson/reg.geojson"))
shp_dep = gpd.read_file(here("data/raw/maps/france_geojson/dep.geojson"))
shp_gre = gpd.read_file(here("data/raw/maps/france_geojson/gre.geojson"))
shp_ser = gpd.read_file(here("data/raw/maps/france_geojson/ser.geojson"))
shp_hex = load_hexmap()

In [4]:
# Load NFI data
nfi_raw = pd.read_feather(here("data/tmp/nfi/nfi_ready_for_analysis.feather"))
nfi_raw.shape

(549255, 193)

## Calculate Change at IDP-Level for All Subsets


In [16]:
# * Get input df --------------------------------------------------------------
df_in = nfi_raw.copy()
if run_only_subset:
    all_sites = df_in["idp"].unique().tolist()
    subset_sites = np.random.choice(
        all_sites, int(len(all_sites) * subset_fraction), replace=False
    ).tolist()
    df_in = df_in.query("idp in @subset_sites")

# * Get factorial setup -------------------------------------------------------
# ! Pick only idp level here but loop over all species and heights
all_regions = ["idp"]  # ["idp", "reg", "dep", "gre", "ser", "hex"]

# Pick top 10 species + populus
all_species = (
    df_in["genus_lat"]
    .value_counts()
    .sort_values(ascending=False)
    .head(10)
    .index.tolist()
)
all_species = ["all"] + all_species
if "Populus" not in all_species:
    all_species.append("Populus")

# Pick all height classes
all_heights = ["all"] + df_in["tree_height_class"].value_counts().index.tolist()
all_heights.remove("Missing")

# * Run loop ------------------------------------------------------------------
# Imports
from utilities import calculate_growth_mortality_optimized
from run_mp import run_mp

# Loop over species
for my_species in all_species:
    # Loop over heights
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()

        # ! Filter df
        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")

        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        # Loop over regions
        for my_region in all_regions:
            # ! Get filename and print it
            if run_only_subset:
                my_dir = here(
                    str.lower(
                        f"data/tmp/nfi/spatio-temporal-analysis/idp/subset_{round(subset_fraction*100)}%_of_sites"
                    )
                )
            else:
                my_dir = here(str.lower(f"data/tmp/nfi/spatio-temporal-analysis/idp/"))
            os.makedirs(my_dir, exist_ok=True)
            my_file = str.lower(f"species_{my_species}-height_{my_height}")
            my_dirfile = f"{my_dir}/{my_file}.feather"

            # Check if file already exists, if so skip current loop
            if os.path.isfile(my_dirfile) and not force_rerunning_all_sites:
                print(f" - File already exists: {my_file}, skipping it")
                continue

            # Verbose
            print(str.lower(f"\n - Working on: {my_file}"))

            # ! Add grouping variable
            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this time information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # ! Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]
            df_mp = run_mp(
                calculate_growth_mortality_optimized,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                grouping_variable=my_region,
            )

            # ! Save df
            df_mp.reset_index(drop=True).to_feather(my_dirfile)


 - working on: species_all-height_all


100%|██████████| 4023/4023 [09:36<00:00,  6.98it/s]



 - working on: species_all-height_10-15


100%|██████████| 2620/2620 [06:25<00:00,  6.79it/s]



 - working on: species_all-height_0-10


100%|██████████| 2567/2567 [06:36<00:00,  6.47it/s]



 - working on: species_all-height_15-20


100%|██████████| 2370/2370 [06:09<00:00,  6.41it/s]



 - working on: species_all-height_20-25


100%|██████████| 2000/2000 [05:07<00:00,  6.50it/s]



 - working on: species_all-height_25+


100%|██████████| 1344/1344 [03:25<00:00,  6.54it/s]



 - working on: species_quercus-height_all


100%|██████████| 2320/2320 [05:43<00:00,  6.76it/s]



 - working on: species_quercus-height_10-15


100%|██████████| 783/783 [01:56<00:00,  6.74it/s]



 - working on: species_quercus-height_0-10


100%|██████████| 803/803 [01:58<00:00,  6.76it/s]



 - working on: species_quercus-height_15-20


100%|██████████| 850/850 [02:06<00:00,  6.71it/s]



 - working on: species_quercus-height_20-25


100%|██████████| 867/867 [02:09<00:00,  6.69it/s]



 - working on: species_quercus-height_25+


100%|██████████| 543/543 [01:21<00:00,  6.67it/s]



 - working on: species_pinus-height_all


100%|██████████| 831/831 [02:03<00:00,  6.71it/s]



 - working on: species_pinus-height_10-15


100%|██████████| 389/389 [00:57<00:00,  6.80it/s]



 - working on: species_pinus-height_0-10


100%|██████████| 352/352 [00:53<00:00,  6.55it/s]



 - working on: species_pinus-height_15-20


100%|██████████| 338/338 [00:51<00:00,  6.52it/s]



 - working on: species_pinus-height_20-25


100%|██████████| 256/256 [00:40<00:00,  6.38it/s]



 - working on: species_pinus-height_25+


100%|██████████| 108/108 [00:18<00:00,  5.88it/s]



 - working on: species_fagus-height_all


100%|██████████| 845/845 [02:06<00:00,  6.70it/s]



 - working on: species_fagus-height_10-15


100%|██████████| 322/322 [00:49<00:00,  6.54it/s]



 - working on: species_fagus-height_0-10


100%|██████████| 249/249 [00:38<00:00,  6.42it/s]



 - working on: species_fagus-height_15-20


100%|██████████| 320/320 [00:49<00:00,  6.45it/s]



 - working on: species_fagus-height_20-25


100%|██████████| 322/322 [00:49<00:00,  6.53it/s]



 - working on: species_fagus-height_25+


100%|██████████| 307/307 [00:47<00:00,  6.43it/s]



 - working on: species_carpinus-height_all


100%|██████████| 784/784 [01:54<00:00,  6.84it/s]



 - working on: species_carpinus-height_10-15


100%|██████████| 456/456 [01:08<00:00,  6.69it/s]



 - working on: species_carpinus-height_0-10


100%|██████████| 257/257 [00:39<00:00,  6.48it/s]



 - working on: species_carpinus-height_15-20


100%|██████████| 341/341 [00:51<00:00,  6.59it/s]



 - working on: species_carpinus-height_20-25


100%|██████████| 194/194 [00:30<00:00,  6.30it/s]



 - working on: species_carpinus-height_25+


100%|██████████| 53/53 [00:10<00:00,  5.09it/s]



 - working on: species_castanea-height_all


100%|██████████| 507/507 [01:16<00:00,  6.62it/s]



 - working on: species_castanea-height_10-15


100%|██████████| 274/274 [00:42<00:00,  6.51it/s]



 - working on: species_castanea-height_0-10


100%|██████████| 213/213 [00:33<00:00,  6.32it/s]



 - working on: species_castanea-height_15-20


100%|██████████| 241/241 [00:37<00:00,  6.37it/s]



 - working on: species_castanea-height_20-25


100%|██████████| 122/122 [00:20<00:00,  5.93it/s]



 - working on: species_castanea-height_25+


100%|██████████| 35/35 [00:08<00:00,  4.33it/s]



 - working on: species_picea-height_all


100%|██████████| 394/394 [01:00<00:00,  6.55it/s]



 - working on: species_picea-height_10-15


100%|██████████| 107/107 [00:18<00:00,  5.82it/s]



 - working on: species_picea-height_0-10


100%|██████████| 145/145 [00:23<00:00,  6.09it/s]



 - working on: species_picea-height_15-20


100%|██████████| 159/159 [00:25<00:00,  6.30it/s]



 - working on: species_picea-height_20-25


100%|██████████| 181/181 [00:28<00:00,  6.43it/s]



 - working on: species_picea-height_25+


100%|██████████| 178/178 [00:28<00:00,  6.29it/s]



 - working on: species_abies-height_all


100%|██████████| 343/343 [00:52<00:00,  6.58it/s]



 - working on: species_abies-height_10-15


100%|██████████| 143/143 [00:23<00:00,  6.02it/s]



 - working on: species_abies-height_0-10


100%|██████████| 171/171 [00:27<00:00,  6.15it/s]



 - working on: species_abies-height_15-20


100%|██████████| 155/155 [00:25<00:00,  6.04it/s]



 - working on: species_abies-height_20-25


100%|██████████| 166/166 [00:26<00:00,  6.19it/s]



 - working on: species_abies-height_25+


100%|██████████| 152/152 [00:25<00:00,  5.98it/s]



 - working on: species_fraxinus-height_all


100%|██████████| 539/539 [01:27<00:00,  6.19it/s]



 - working on: species_fraxinus-height_10-15


100%|██████████| 232/232 [00:41<00:00,  5.64it/s]



 - working on: species_fraxinus-height_0-10


100%|██████████| 101/101 [00:18<00:00,  5.53it/s]



 - working on: species_fraxinus-height_15-20


100%|██████████| 198/198 [00:32<00:00,  6.12it/s]



 - working on: species_fraxinus-height_20-25


100%|██████████| 167/167 [00:27<00:00,  6.03it/s]



 - working on: species_fraxinus-height_25+


100%|██████████| 145/145 [00:24<00:00,  5.88it/s]



 - working on: species_acer-height_all


100%|██████████| 541/541 [01:24<00:00,  6.42it/s]



 - working on: species_acer-height_10-15


100%|██████████| 244/244 [00:39<00:00,  6.17it/s]



 - working on: species_acer-height_0-10


100%|██████████| 198/198 [00:32<00:00,  6.11it/s]



 - working on: species_acer-height_15-20


100%|██████████| 179/179 [00:28<00:00,  6.30it/s]



 - working on: species_acer-height_20-25


100%|██████████| 102/102 [00:17<00:00,  5.85it/s]



 - working on: species_acer-height_25+


100%|██████████| 44/44 [00:09<00:00,  4.83it/s]



 - working on: species_betula-height_all


100%|██████████| 391/391 [01:02<00:00,  6.28it/s]



 - working on: species_betula-height_10-15


100%|██████████| 183/183 [00:29<00:00,  6.16it/s]



 - working on: species_betula-height_0-10


100%|██████████| 110/110 [00:19<00:00,  5.70it/s]



 - working on: species_betula-height_15-20


100%|██████████| 157/157 [00:28<00:00,  5.60it/s]



 - working on: species_betula-height_20-25


100%|██████████| 102/102 [00:18<00:00,  5.62it/s]



 - working on: species_betula-height_25+


100%|██████████| 21/21 [00:05<00:00,  3.57it/s]



 - working on: species_populus-height_all


100%|██████████| 342/342 [00:54<00:00,  6.29it/s]



 - working on: species_populus-height_10-15


100%|██████████| 98/98 [00:17<00:00,  5.59it/s]



 - working on: species_populus-height_0-10


100%|██████████| 64/64 [00:12<00:00,  5.12it/s]



 - working on: species_populus-height_15-20


100%|██████████| 107/107 [00:19<00:00,  5.52it/s]



 - working on: species_populus-height_20-25


100%|██████████| 116/116 [00:20<00:00,  5.77it/s]



 - working on: species_populus-height_25+


100%|██████████| 122/122 [00:21<00:00,  5.78it/s]


## Directly at regional level


In [29]:
#
# * Get input df --------------------------------------------------------------
df_in = nfi_raw.copy()
if run_only_subset:
    all_sites = df_in["idp"].unique().tolist()
    subset_sites = np.random.choice(
        all_sites, int(len(all_sites) * subset_fraction), replace=False
    ).tolist()
    df_in = df_in.query("idp in @subset_sites")

# * Get factorial setup -------------------------------------------------------
# ! Loop over all regions (not idp) but not over all heights, would take too long for now...
all_regions = ["reg", "dep", "gre", "ser", "hex"]

# Pick top 10 species + populus
all_species = (
    df_in["genus_lat"]
    .value_counts()
    .sort_values(ascending=False)
    .head(10)
    .index.tolist()
)
all_species = ["all"] + all_species
if "Populus" not in all_species:
    all_species.append("Populus")

# Pick all height classes
# + df_in["tree_height_class"].value_counts().index.tolist()all_heights.remove("Missing")
all_heights = ["all"]

# * Run loop ------------------------------------------------------------------
# Imports
from utilities import calculate_growth_mortality_optimized
from run_mp import run_mp

# Loop over species
for my_species in all_species:
    # Loop over heights
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()

        # ! Filter df
        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")

        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        # Loop over regions
        for my_region in all_regions:
            # ! Get filename and print it
            if run_only_subset:
                my_dir = here(
                    str.lower(
                        f"data/tmp/nfi/spatio-temporal-analysis/direct/subset_{round(subset_fraction*100)}%_of_sites"
                    )
                )
            else:
                my_dir = here(
                    str.lower(f"data/tmp/nfi/spatio-temporal-analysis/direct/")
                )
            os.makedirs(my_dir, exist_ok=True)
            my_file = str.lower(
                f"species_{my_species}-height_{my_height}_region-{my_region}"
            )
            my_dirfile = f"{my_dir}/{my_file}.feather"

            # Check if file already exists, if so skip current loop
            if os.path.isfile(my_dirfile) and not force_rerunning_all_sites:
                print(f" - File already exists: {my_file}, skipping it")
                continue

            # Verbose
            print(str.lower(f"\n - Working on: {my_file}"))

            # ! Add grouping variable
            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this time information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # ! Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]

            df_mp = run_mp(
                calculate_growth_mortality_optimized,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                grouping_variable=my_region,
            )

            # ! Save df
            df_mp.reset_index(drop=True).to_feather(my_dirfile)


 - working on: species_all-height_all_region-reg


100%|██████████| 91/91 [00:18<00:00,  4.81it/s]



 - working on: species_all-height_all_region-dep


100%|██████████| 625/625 [01:43<00:00,  6.06it/s]



 - working on: species_all-height_all_region-gre


100%|██████████| 77/77 [00:15<00:00,  5.07it/s]



 - working on: species_all-height_all_region-ser


100%|██████████| 576/576 [01:30<00:00,  6.33it/s]



 - working on: species_all-height_all_region-hex


100%|██████████| 1296/1296 [03:31<00:00,  6.14it/s]



 - working on: species_quercus-height_all_region-reg


100%|██████████| 91/91 [00:19<00:00,  4.64it/s]



 - working on: species_quercus-height_all_region-dep


100%|██████████| 573/573 [01:34<00:00,  6.09it/s]



 - working on: species_quercus-height_all_region-gre


100%|██████████| 77/77 [00:15<00:00,  4.88it/s]



 - working on: species_quercus-height_all_region-ser


100%|██████████| 506/506 [01:21<00:00,  6.19it/s]



 - working on: species_quercus-height_all_region-hex


100%|██████████| 1029/1029 [02:45<00:00,  6.20it/s]



 - working on: species_pinus-height_all_region-reg


100%|██████████| 88/88 [00:17<00:00,  5.13it/s]



 - working on: species_pinus-height_all_region-dep


100%|██████████| 352/352 [00:58<00:00,  6.04it/s]



 - working on: species_pinus-height_all_region-gre


100%|██████████| 72/72 [00:15<00:00,  4.73it/s]



 - working on: species_pinus-height_all_region-ser


100%|██████████| 302/302 [00:48<00:00,  6.24it/s]



 - working on: species_pinus-height_all_region-hex


100%|██████████| 486/486 [01:13<00:00,  6.57it/s]



 - working on: species_fagus-height_all_region-reg


100%|██████████| 79/79 [00:15<00:00,  5.05it/s]



 - working on: species_fagus-height_all_region-dep


100%|██████████| 362/362 [00:59<00:00,  6.04it/s]



 - working on: species_fagus-height_all_region-gre


100%|██████████| 67/67 [00:13<00:00,  4.97it/s]



 - working on: species_fagus-height_all_region-ser


100%|██████████| 273/273 [00:45<00:00,  5.96it/s]



 - working on: species_fagus-height_all_region-hex


100%|██████████| 510/510 [01:24<00:00,  6.02it/s]



 - working on: species_carpinus-height_all_region-reg


100%|██████████| 68/68 [00:13<00:00,  4.88it/s]



 - working on: species_carpinus-height_all_region-dep


100%|██████████| 299/299 [00:48<00:00,  6.23it/s]



 - working on: species_carpinus-height_all_region-gre


100%|██████████| 51/51 [00:11<00:00,  4.50it/s]



 - working on: species_carpinus-height_all_region-ser


100%|██████████| 248/248 [00:39<00:00,  6.22it/s]



 - working on: species_carpinus-height_all_region-hex


100%|██████████| 460/460 [01:16<00:00,  6.04it/s]



 - working on: species_castanea-height_all_region-reg


100%|██████████| 77/77 [00:15<00:00,  4.99it/s]



 - working on: species_castanea-height_all_region-dep


100%|██████████| 277/277 [00:47<00:00,  5.79it/s]



 - working on: species_castanea-height_all_region-gre


100%|██████████| 56/56 [00:12<00:00,  4.63it/s]



 - working on: species_castanea-height_all_region-ser


100%|██████████| 241/241 [00:41<00:00,  5.83it/s]



 - working on: species_castanea-height_all_region-hex


100%|██████████| 353/353 [00:57<00:00,  6.18it/s]



 - working on: species_picea-height_all_region-reg


100%|██████████| 57/57 [00:12<00:00,  4.57it/s]



 - working on: species_picea-height_all_region-dep


100%|██████████| 190/190 [00:32<00:00,  5.81it/s]



 - working on: species_picea-height_all_region-gre


100%|██████████| 50/50 [00:11<00:00,  4.26it/s]



 - working on: species_picea-height_all_region-ser


100%|██████████| 151/151 [00:28<00:00,  5.33it/s]



 - working on: species_picea-height_all_region-hex


100%|██████████| 238/238 [00:39<00:00,  5.95it/s]



 - working on: species_abies-height_all_region-reg


100%|██████████| 51/51 [00:12<00:00,  4.21it/s]



 - working on: species_abies-height_all_region-dep


100%|██████████| 163/163 [00:28<00:00,  5.67it/s]



 - working on: species_abies-height_all_region-gre


100%|██████████| 52/52 [00:12<00:00,  4.27it/s]



 - working on: species_abies-height_all_region-ser


100%|██████████| 137/137 [00:24<00:00,  5.66it/s]



 - working on: species_abies-height_all_region-hex


100%|██████████| 210/210 [00:34<00:00,  6.08it/s]



 - working on: species_fraxinus-height_all_region-reg


100%|██████████| 74/74 [00:14<00:00,  4.95it/s]



 - working on: species_fraxinus-height_all_region-dep


100%|██████████| 307/307 [00:51<00:00,  5.91it/s]



 - working on: species_fraxinus-height_all_region-gre


100%|██████████| 67/67 [00:15<00:00,  4.32it/s]



 - working on: species_fraxinus-height_all_region-ser


100%|██████████| 263/263 [00:43<00:00,  6.00it/s]



 - working on: species_fraxinus-height_all_region-hex


100%|██████████| 385/385 [00:58<00:00,  6.55it/s]



 - working on: species_acer-height_all_region-reg


100%|██████████| 77/77 [00:16<00:00,  4.63it/s]



 - working on: species_acer-height_all_region-dep


100%|██████████| 287/287 [00:47<00:00,  6.03it/s]



 - working on: species_acer-height_all_region-gre


100%|██████████| 69/69 [00:14<00:00,  4.78it/s]



 - working on: species_acer-height_all_region-ser


100%|██████████| 246/246 [00:44<00:00,  5.50it/s]



 - working on: species_acer-height_all_region-hex


100%|██████████| 367/367 [00:57<00:00,  6.42it/s]



 - working on: species_populus-height_all_region-reg


100%|██████████| 76/76 [00:15<00:00,  5.04it/s]



 - working on: species_populus-height_all_region-dep


100%|██████████| 267/267 [00:42<00:00,  6.23it/s]



 - working on: species_populus-height_all_region-gre


100%|██████████| 55/55 [00:11<00:00,  4.71it/s]



 - working on: species_populus-height_all_region-ser


100%|██████████| 217/217 [00:35<00:00,  6.05it/s]



 - working on: species_populus-height_all_region-hex


100%|██████████| 314/314 [00:49<00:00,  6.35it/s]


## Spatial Patterns

- There are two approaches:

  - Direct: Aggregate trees to given area and calculate change _directly_ at this level
    - To assess the uncertainty per area and year, this approach requires a bootstrapping
  - Aggregated: Aggregate trees to site and take the mean over the given area
    - To assess the uncertainty per area and year, this approach simply reports the mean and sd across sites

- Plus, we are interested in all species and in species subsets.


### Direct Approach


In [15]:
# Imports
from utilities import calculate_growth_mortality
from run_mp import run_mp

# Get factorial setup
all_regions = ["idp", "reg", "dep", "gre", "ser", "hex"]
all_species = ["all"] + nfi_raw["genus_lat"].value_counts().sort_values(ascending=False)
all_species = all_species.head(10).index.tolist()
all_heights = ["all"] + df_merged["tree_height_class"].value_counts().index.tolist()
all_heights.remove("Missing")

# Make sure poplars are in there
if "Populus" not in all_species:
    all_species.append("Populus")

# Get input df
df_in = nfi_raw.copy()

# Loop over species
for my_species in all_species:
    # Loop over heights
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()

        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")

        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        # Loop over regions
        for my_region in all_regions:
            # Get filename and print it
            my_dir = here(
                str.lower(f"data/tmp/nfi/spatio-temporal-analysis/direct_approach")
            )
            os.makedirs(my_dir, exist_ok=True)
            my_file = str.lower(f"species_{my_species}-area_{my_region}")
            my_dirfile = f"{my_dir}/{my_file}.feather"

            # Check if file already exists, if so skip current loop
            if os.path.isfile(my_dirfile) and not force_rerunning_all_sites:
                print(f" - File already exists: {my_file}, skipping it")
                continue

            # Verbose
            print(str.lower(f"\n - Working on: {my_file}"))

            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this time information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]
            df_mp = run_mp(
                calculate_growth_mortality,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                min_trees_per_plot=1,
                grouping_variable=my_region,
            )

            # Save df
            df_mp.reset_index(drop=True).to_feather(my_dirfile)

 - File already exists: species_all-area_idp, skipping it

 - working on: species_all-area_reg


100%|██████████| 91/91 [00:24<00:00,  3.73it/s]



 - working on: species_all-area_dep


100%|██████████| 649/649 [02:20<00:00,  4.61it/s]



 - working on: species_all-area_gre


100%|██████████| 77/77 [00:22<00:00,  3.36it/s]



 - working on: species_all-area_ser


100%|██████████| 602/602 [02:22<00:00,  4.23it/s]



 - working on: species_all-area_hex


100%|██████████| 1582/1582 [05:11<00:00,  5.09it/s]



 - working on: species_quercus-area_idp


 91%|█████████▏| 20905/22900 [59:34<22:21,  1.49it/s] 

### Correct Approach


In [None]:
# Load the idp-level data for each species and calculate the mean and sd over desired region

## Temporal Patterns

- For the temporal patterns, we can again do the holistic or the mean-approach:
- For now, to save time, I am only doing the mean approach


### Mean-Approach

- For this, I need factorial species-height subsets but only at the idp level


In [None]:
# Calculate metrics of change at region-level
all_regions = ["idp"]
all_species = ["all"] + df_merged["genus_lat"].value_counts().head(10).index.tolist()
all_heights = ["all"] + df_merged["tree_height_class"].value_counts().index.tolist()
all_heights.remove("Missing")

from utilities import calculate_growth_mortality
from run_mp import run_mp

df_in = df_merged.copy()
group_per_height = True

for my_species in all_species:
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()
        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")
        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        for my_region in all_regions:
            my_file = str.lower(
                f"species_{my_species}__tree_heights_{my_height}__area_{my_region}"
            )

            print(str.lower(f"\n - Working on: {my_file}"))

            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]
            df_mp = run_mp(
                calculate_growth_mortality,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                min_trees_per_plot=1,
                grouping_variable=my_region,
            )

            # Save df
            my_dir = here(str.lower(f"data/tmp/dfs_metrics_of_change/area-level/"))
            if not os.path.exists(my_dir):
                os.makedirs(my_dir)
            df_mp.reset_index(drop=True).to_feather(f"{my_dir}/{my_file}.feather")

---

## Load Data


### Maps


In [6]:
shp_hex10 = gpd.read_file(here("qgis/hexmaps/hexmap_10000.geojson"))
shp_hex12 = gpd.read_file(here("qgis/hexmaps/hexmap_12500.geojson"))
shp_hex25 = gpd.read_file(here("qgis/hexmaps/hexmap_25000.geojson"))

DriverError: /Users/pascal/repos/padasch/ifn_analysis/qgis/hexmaps/hexmap_10000.geojson: No such file or directory

### NFI Data


In [2]:
# Load NFI data ready for analysis
data = pd.read_feather("nfi_ready_for_spatial_temporal_analysis.feather")

# Move ser and gre columns to the start
data.insert(0, "ser", data.pop("ser"))
data.insert(0, "gre", data.pop("gre"))
data.insert(0, "lat", data.pop("lat"))
data.insert(0, "lon", data.pop("lon"))
data.insert(0, "campagne_1", data.pop("campagne_1"))
data.insert(0, "idp", data.pop("idp"))
data.head()

Unnamed: 0,idp,campagne_1,lon,lat,gre,ser,hdec,ddec,sfpied,sfdorge_1,...,tform_1,plisi,tpespar1_1,prelev5,videplant,incid_1,lat_fr,lon_fr,greco,reg
0,632691,2011,-2.842824,48.337505,A,A11,10.8,,0,Missing,...,,0.0,73,,,0.0,6820144.0,267415.027897,A,53
1,632691,2011,-2.842824,48.337505,A,A11,18.4,,0,Missing,...,,0.0,73,,,0.0,6820144.0,267415.027897,A,53
2,632691,2011,-2.842824,48.337505,A,A11,13.0,,0,Missing,...,,0.0,73,,,0.0,6820144.0,267415.027897,A,53
3,632691,2011,-2.842824,48.337505,A,A11,10.4,,0,Missing,...,,0.0,73,,,0.0,6820144.0,267415.027897,A,53
4,632691,2011,-2.842824,48.337505,A,A11,21.0,,0,Missing,...,,0.0,73,,,0.0,6820144.0,267415.027897,A,53


In [13]:
# Create dictionary of genus-espar
dict_species = data[["genus_lat", "espar_red"]].drop_duplicates().reset_index(drop=True)
dict_species["espar_red"].value_counts(ascending=True).tail(10)

espar_red
28     1
27     1
3      1
41     1
25     2
57     2
29     7
23     9
68    11
49    19
Name: count, dtype: int64

In [16]:
dict_species.query("espar_red == '68'")

Unnamed: 0,genus_lat,espar_red
0,Chamaecyparis,68
70,Pinus,68
79,Taxodium,68
81,Cupressus,68
82,Thuja,68
87,Tsuga,68
89,Sequoia,68
99,Cedrus,68
100,Conifer,68
102,Cryptomeria,68


In [17]:
dict_species.query("genus_lat == 'Abies'")

Unnamed: 0,genus_lat,espar_red
23,Abies,61
71,Abies,72
73,Abies,71
97,Abies,70
108,Abies,68


### Metric of Change


In [9]:
# Calculate on site-level for aggregation to larger region later on
df_list = data.groupby("idp", as_index=False)
df_list = [group for name, group in df_list]
df_site = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    divide_by_nplots=True,
    grouping_variable=my_group,
)

  8%|▊         | 3055/40221 [06:49<1:23:06,  7.45it/s]


KeyboardInterrupt: 

In [None]:
# Regional Level
df_list = data.groupby("reg", as_index=False)
df_list = [group for name, group in df_list]
df_reg = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="reg",
    divide_by_nplots=True,
)

In [None]:
# Department Level
df_list = data.groupby("dep", as_index=False)
df_list = [group for name, group in df_list]
df_dep = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="dep",
    divide_by_nplots=True,
)

In [None]:
# Sylvoregion Level
df_list = data.groupby("ser", as_index=False)
df_list = [group for name, group in df_list]
df_ser = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="ser",
    divide_by_nplots=True,
)

In [None]:
# Greco Level
df_list = data.groupby("gre", as_index=False)
df_list = [group for name, group in df_list]
df_gre = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="gre",
    divide_by_nplots=True,
)

## Calculate Metrics of Change

**Target:** Mean of Metric within Region

Routine:

- Filter for tree subset
- For each site, calculate metrics of change
- Group sites by given variable (ser, gre, dep, reg, hex)
- Calculate the mean and sd for that group
- Plot mean and sd for that group

**Alternative:** Calculate total change per region (aggregate all sites to one instead of taking the mean across sites).

**Maps:**

- Plot The difference relative to 2010 Campagnes!


### All species


### Top 10 Species


---
