# Spatial Patterns of Change


## Imports and Functions


In [1]:
# Data wrangling
import pandas as pd
import numpy as np
import geopandas as gpd
import h3pandas

# Data visualisation
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pylab import f

# My functions
import sys

sys.path.insert(0, "../../src")
from run_mp import *
from utilities import *
from random_forest_utils import *

# Other
from os import error
import warnings
import chime
from pyprojroot import here

chime.theme("mario")

# Magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

## User Input


In [2]:
force_rerunning_all_sites = False
run_only_subset = False
subset_fraction = 0.10

## Load Data


In [3]:
# Load NFI data
nfi_raw = get_final_nfi_data_for_analysis()
nfi_raw.shape

(549255, 193)

## Calculate Metrics of Change


### At site-level


In [4]:
# * Get input df --------------------------------------------------------------
df_in = nfi_raw.copy()
if run_only_subset:
    all_sites = df_in["idp"].unique().tolist()
    subset_sites = np.random.choice(
        all_sites, int(len(all_sites) * subset_fraction), replace=False
    ).tolist()
    df_in = df_in.query("idp in @subset_sites")

# * Get factorial setup -------------------------------------------------------
# ! Pick only idp level here but loop over all species and heights
all_regions = ["idp"]  # ["idp", "reg", "dep", "gre", "ser", "hex"]

# Pick top 10 species + populus
all_species = (
    df_in["genus_lat"]
    .value_counts()
    .sort_values(ascending=False)
    .head(10)
    .index.tolist()
)
all_species = ["all"] + all_species
if "Populus" not in all_species:
    all_species.append("Populus")

# Pick all height classes
all_heights = ["all"] + df_in["tree_height_class"].value_counts().index.tolist()
all_heights.remove("Missing")

# * Run loop ------------------------------------------------------------------
# Imports
from utilities import calculate_growth_mortality_optimized
from run_mp import run_mp

species_counter = 0
# Loop over species
for my_species in all_species:
    # Verbose
    species_counter = species_counter + 1
    display(f" --- Working on species Nr. {species_counter} : {my_species} --- ")
    # Loop over heights
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()

        # ! Filter df
        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")

        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        # Loop over regions
        for my_region in all_regions:
            # ! Get filename and print it
            if run_only_subset:
                my_dir = here(
                    str.lower(
                        f"data/tmp/nfi/growth_and_mortality_data/idp/subset_{round(subset_fraction*100)}%_of_sites"
                    )
                )
            else:
                my_dir = here(str.lower(f"data/tmp/nfi/growth_and_mortality_data/idp"))
            os.makedirs(my_dir, exist_ok=True)
            my_file = str.lower(f"species_{my_species}-height_{my_height}")
            my_dirfile = f"{my_dir}/{my_file}.feather"

            # Check if file already exists, if so skip current loop
            if os.path.isfile(my_dirfile) and not force_rerunning_all_sites:
                print(f" - File already exists: {my_file}, skipping it")
                continue

            # Verbose
            print(str.lower(f"\n - Working on: {my_file}"))

            # ! Add grouping variable
            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this time information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # ! Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]
            df_mp = run_mp(
                calculate_growth_mortality_optimized,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                grouping_variable=my_region,
            )

            # ! Save df
            df_mp.reset_index(drop=True).to_feather(my_dirfile)

' --- Working on species Nr. 1 : all --- '

 - File already exists: species_all-height_all, skipping it
 - File already exists: species_all-height_10-15, skipping it
 - File already exists: species_all-height_0-10, skipping it
 - File already exists: species_all-height_15-20, skipping it
 - File already exists: species_all-height_20-25, skipping it
 - File already exists: species_all-height_25+, skipping it


' --- Working on species Nr. 2 : Quercus --- '

 - File already exists: species_quercus-height_all, skipping it
 - File already exists: species_quercus-height_10-15, skipping it
 - File already exists: species_quercus-height_0-10, skipping it
 - File already exists: species_quercus-height_15-20, skipping it
 - File already exists: species_quercus-height_20-25, skipping it
 - File already exists: species_quercus-height_25+, skipping it


' --- Working on species Nr. 3 : Pinus --- '

 - File already exists: species_pinus-height_all, skipping it
 - File already exists: species_pinus-height_10-15, skipping it
 - File already exists: species_pinus-height_0-10, skipping it
 - File already exists: species_pinus-height_15-20, skipping it
 - File already exists: species_pinus-height_20-25, skipping it
 - File already exists: species_pinus-height_25+, skipping it


' --- Working on species Nr. 4 : Fagus --- '

 - File already exists: species_fagus-height_all, skipping it
 - File already exists: species_fagus-height_10-15, skipping it
 - File already exists: species_fagus-height_0-10, skipping it
 - File already exists: species_fagus-height_15-20, skipping it
 - File already exists: species_fagus-height_20-25, skipping it
 - File already exists: species_fagus-height_25+, skipping it


' --- Working on species Nr. 5 : Carpinus --- '

 - File already exists: species_carpinus-height_all, skipping it
 - File already exists: species_carpinus-height_10-15, skipping it
 - File already exists: species_carpinus-height_0-10, skipping it
 - File already exists: species_carpinus-height_15-20, skipping it
 - File already exists: species_carpinus-height_20-25, skipping it
 - File already exists: species_carpinus-height_25+, skipping it


' --- Working on species Nr. 6 : Castanea --- '

 - File already exists: species_castanea-height_all, skipping it
 - File already exists: species_castanea-height_10-15, skipping it
 - File already exists: species_castanea-height_0-10, skipping it
 - File already exists: species_castanea-height_15-20, skipping it
 - File already exists: species_castanea-height_20-25, skipping it
 - File already exists: species_castanea-height_25+, skipping it


' --- Working on species Nr. 7 : Picea --- '

 - File already exists: species_picea-height_all, skipping it
 - File already exists: species_picea-height_10-15, skipping it
 - File already exists: species_picea-height_0-10, skipping it
 - File already exists: species_picea-height_15-20, skipping it
 - File already exists: species_picea-height_20-25, skipping it
 - File already exists: species_picea-height_25+, skipping it


' --- Working on species Nr. 8 : Abies --- '

 - File already exists: species_abies-height_all, skipping it
 - File already exists: species_abies-height_10-15, skipping it
 - File already exists: species_abies-height_0-10, skipping it
 - File already exists: species_abies-height_15-20, skipping it
 - File already exists: species_abies-height_20-25, skipping it
 - File already exists: species_abies-height_25+, skipping it


' --- Working on species Nr. 9 : Fraxinus --- '

 - File already exists: species_fraxinus-height_all, skipping it
 - File already exists: species_fraxinus-height_10-15, skipping it
 - File already exists: species_fraxinus-height_0-10, skipping it
 - File already exists: species_fraxinus-height_15-20, skipping it
 - File already exists: species_fraxinus-height_20-25, skipping it
 - File already exists: species_fraxinus-height_25+, skipping it


' --- Working on species Nr. 10 : Acer --- '

 - File already exists: species_acer-height_all, skipping it
 - File already exists: species_acer-height_10-15, skipping it
 - File already exists: species_acer-height_0-10, skipping it
 - File already exists: species_acer-height_15-20, skipping it
 - File already exists: species_acer-height_20-25, skipping it
 - File already exists: species_acer-height_25+, skipping it


' --- Working on species Nr. 11 : Betula --- '

 - File already exists: species_betula-height_all, skipping it
 - File already exists: species_betula-height_10-15, skipping it
 - File already exists: species_betula-height_0-10, skipping it
 - File already exists: species_betula-height_15-20, skipping it
 - File already exists: species_betula-height_20-25, skipping it
 - File already exists: species_betula-height_25+, skipping it


' --- Working on species Nr. 12 : Populus --- '

 - File already exists: species_populus-height_all, skipping it
 - File already exists: species_populus-height_10-15, skipping it
 - File already exists: species_populus-height_0-10, skipping it
 - File already exists: species_populus-height_15-20, skipping it
 - File already exists: species_populus-height_20-25, skipping it
 - File already exists: species_populus-height_25+, skipping it


### At region-level - directly


In [5]:
#
# * Get input df --------------------------------------------------------------
df_in = nfi_raw.copy()
if run_only_subset:
    all_sites = df_in["idp"].unique().tolist()
    subset_sites = np.random.choice(
        all_sites, int(len(all_sites) * subset_fraction), replace=False
    ).tolist()
    df_in = df_in.query("idp in @subset_sites")

# * Get factorial setup -------------------------------------------------------
# ! Loop over all regions (not idp) but not over all heights, would take too long for now...
all_regions = ["reg", "dep", "gre", "ser", "hex"]

# Pick top 10 species + populus
all_species = (
    df_in["genus_lat"]
    .value_counts()
    .sort_values(ascending=False)
    .head(10)
    .index.tolist()
)
all_species = ["all"] + all_species
if "Populus" not in all_species:
    all_species.append("Populus")

# Pick all height classes
# + df_in["tree_height_class"].value_counts().index.tolist()all_heights.remove("Missing")
all_heights = ["all"]

# * Run loop ------------------------------------------------------------------
# Imports
from utilities import calculate_growth_mortality_optimized
from run_mp import run_mp

# Loop over species
species_counter = 0
for my_species in all_species:
    # Verbose
    species_counter = species_counter + 1
    display(f" --- Working on species Nr. {species_counter} : {my_species} --- ")

    # Loop over heights
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()

        # ! Filter df
        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")

        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        # Loop over regions
        for my_region in all_regions:
            # ! Get filename and print it
            if run_only_subset:
                my_dir = here(
                    str.lower(
                        f"data/tmp/nfi/growth_and_mortality_data/direct/subset_{round(subset_fraction*100)}%_of_sites"
                    )
                )
            else:
                my_dir = here(
                    str.lower(f"data/tmp/nfi/growth_and_mortality_data/direct/")
                )
            os.makedirs(my_dir, exist_ok=True)
            my_file = str.lower(
                f"species_{my_species}-height_{my_height}_region-{my_region}"
            )
            my_dirfile = f"{my_dir}/{my_file}.feather"

            # Check if file already exists, if so skip current loop
            if os.path.isfile(my_dirfile) and not force_rerunning_all_sites:
                print(f" - File already exists: {my_file}, skipping it")
                continue

            # Verbose
            print(str.lower(f"\n - Working on: {my_file}"))

            # ! Add grouping variable
            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this time information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # ! Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]

            df_mp = run_mp(
                calculate_growth_mortality_optimized,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                grouping_variable=my_region,
            )

            # ! Save df
            df_mp.reset_index(drop=True).to_feather(my_dirfile)

' --- Working on species Nr. 1 : all --- '

 - File already exists: species_all-height_all_region-reg, skipping it
 - File already exists: species_all-height_all_region-dep, skipping it
 - File already exists: species_all-height_all_region-gre, skipping it
 - File already exists: species_all-height_all_region-ser, skipping it
 - File already exists: species_all-height_all_region-hex, skipping it


' --- Working on species Nr. 2 : Quercus --- '

 - File already exists: species_quercus-height_all_region-reg, skipping it
 - File already exists: species_quercus-height_all_region-dep, skipping it
 - File already exists: species_quercus-height_all_region-gre, skipping it
 - File already exists: species_quercus-height_all_region-ser, skipping it
 - File already exists: species_quercus-height_all_region-hex, skipping it


' --- Working on species Nr. 3 : Pinus --- '

 - File already exists: species_pinus-height_all_region-reg, skipping it
 - File already exists: species_pinus-height_all_region-dep, skipping it
 - File already exists: species_pinus-height_all_region-gre, skipping it
 - File already exists: species_pinus-height_all_region-ser, skipping it
 - File already exists: species_pinus-height_all_region-hex, skipping it


' --- Working on species Nr. 4 : Fagus --- '

 - File already exists: species_fagus-height_all_region-reg, skipping it
 - File already exists: species_fagus-height_all_region-dep, skipping it
 - File already exists: species_fagus-height_all_region-gre, skipping it
 - File already exists: species_fagus-height_all_region-ser, skipping it
 - File already exists: species_fagus-height_all_region-hex, skipping it


' --- Working on species Nr. 5 : Carpinus --- '

 - File already exists: species_carpinus-height_all_region-reg, skipping it
 - File already exists: species_carpinus-height_all_region-dep, skipping it
 - File already exists: species_carpinus-height_all_region-gre, skipping it
 - File already exists: species_carpinus-height_all_region-ser, skipping it
 - File already exists: species_carpinus-height_all_region-hex, skipping it


' --- Working on species Nr. 6 : Castanea --- '

 - File already exists: species_castanea-height_all_region-reg, skipping it
 - File already exists: species_castanea-height_all_region-dep, skipping it
 - File already exists: species_castanea-height_all_region-gre, skipping it
 - File already exists: species_castanea-height_all_region-ser, skipping it
 - File already exists: species_castanea-height_all_region-hex, skipping it


' --- Working on species Nr. 7 : Picea --- '

 - File already exists: species_picea-height_all_region-reg, skipping it
 - File already exists: species_picea-height_all_region-dep, skipping it
 - File already exists: species_picea-height_all_region-gre, skipping it
 - File already exists: species_picea-height_all_region-ser, skipping it
 - File already exists: species_picea-height_all_region-hex, skipping it


' --- Working on species Nr. 8 : Abies --- '

 - File already exists: species_abies-height_all_region-reg, skipping it
 - File already exists: species_abies-height_all_region-dep, skipping it
 - File already exists: species_abies-height_all_region-gre, skipping it
 - File already exists: species_abies-height_all_region-ser, skipping it
 - File already exists: species_abies-height_all_region-hex, skipping it


' --- Working on species Nr. 9 : Fraxinus --- '

 - File already exists: species_fraxinus-height_all_region-reg, skipping it
 - File already exists: species_fraxinus-height_all_region-dep, skipping it
 - File already exists: species_fraxinus-height_all_region-gre, skipping it
 - File already exists: species_fraxinus-height_all_region-ser, skipping it
 - File already exists: species_fraxinus-height_all_region-hex, skipping it


' --- Working on species Nr. 10 : Acer --- '

 - File already exists: species_acer-height_all_region-reg, skipping it
 - File already exists: species_acer-height_all_region-dep, skipping it
 - File already exists: species_acer-height_all_region-gre, skipping it
 - File already exists: species_acer-height_all_region-ser, skipping it
 - File already exists: species_acer-height_all_region-hex, skipping it


' --- Working on species Nr. 11 : Betula --- '

 - File already exists: species_betula-height_all_region-reg, skipping it
 - File already exists: species_betula-height_all_region-dep, skipping it
 - File already exists: species_betula-height_all_region-gre, skipping it
 - File already exists: species_betula-height_all_region-ser, skipping it
 - File already exists: species_betula-height_all_region-hex, skipping it


' --- Working on species Nr. 12 : Populus --- '

 - File already exists: species_populus-height_all_region-reg, skipping it
 - File already exists: species_populus-height_all_region-dep, skipping it
 - File already exists: species_populus-height_all_region-gre, skipping it
 - File already exists: species_populus-height_all_region-ser, skipping it
 - File already exists: species_populus-height_all_region-hex, skipping it


### At region-level - aggregated


In [6]:
# List all of the subsets where change was calculated at site-level
if run_only_subset:
    my_dir = here(
        f"data/tmp/nfi/growth_and_mortality_data/idp/subset_{round(subset_fraction*100)}%_of_sites"
    )
else:
    my_dir = here(f"data/tmp/nfi/growth_and_mortality_data/idp/")

my_dir_files = [x for x in os.listdir(my_dir) if x.endswith(".feather")]
my_dir_files[:3]

['species_fraxinus-height_0-10.feather',
 'species_acer-height_0-10.feather',
 'species_quercus-height_all.feather']

In [7]:
idp_region_dict = nfi_raw[
    ["idp", "campagne_1", "reg", "dep", "gre", "ser", "hex"]
].drop_duplicates()

In [8]:
#
# * Preparation ---------------------------------------------------------------
# Set all regions
all_regions = ["reg", "dep", "gre", "ser", "hex"]

# Get regions dictionary
idp_region_dict = nfi_raw[
    ["idp", "campagne_1"] + ["reg", "dep", "gre", "ser", "hex"]
].drop_duplicates()

# For every region, attach variable region_year to df
for my_region in all_regions:
    idp_region_dict[my_region + "_year"] = (
        idp_region_dict[my_region].astype(str)
        + "_"
        + idp_region_dict["campagne_1"].astype(str)
    )

# * Loop over all files -------------------------------------------------------

# Loop over all files
for my_file in my_dir_files:
    # Loop over all regions
    for my_region in all_regions:
        # Get file
        df_loop = pd.read_feather(f"{my_dir}/{my_file}")

        # Attach region_year information and drop idp to avoid confusion
        df_loop = df_loop.merge(
            idp_region_dict[["idp", f"{my_region}_year"]], on="idp"
        ).drop("idp", axis=1)

        # Group by region_year and take mean and sd of all variables
        # Except for number of plots, which should be summed up
        my_agg = {
            var: ["mean", "std"]
            for var in df_loop.columns
            if var not in [f"{my_region}_year", "n_plots"]
        }
        my_agg["n_plots"] = "sum"

        grouped_df = df_loop.groupby(f"{my_region}_year").agg(my_agg)
        grouped_df.columns = ["_".join((var, stat)) for var, stat in grouped_df.columns]
        grouped_df = grouped_df.reset_index()
        grouped_df

        # Save file
        if run_only_subset:
            new_dir = here(
                f"data/tmp/nfi/growth_and_mortality_data/aggregated/subset_{round(subset_fraction*100)}%_of_sites"
            )
        else:
            new_dir = here(f"data/tmp/nfi/growth_and_mortality_data/aggregated/")

        os.makedirs(new_dir, exist_ok=True)
        new_file = my_file.replace(".feather", f"_region-{my_region}.feather")
        grouped_df.to_feather(f"{new_dir}/{new_file}")

## Spatial Patterns


### Direct Method


In [None]:
# Get directory
if run_only_subset:
    my_dir = here(
        f"data/tmp/nfi/growth_and_mortality_data/direct/subset_{round(subset_fraction*100)}%_of_sites"
    )
else:
    my_dir = here(f"data/tmp/nfi/growth_and_mortality_data/direct/")


# List files in directory
all_files = sorted(glob.glob(my_dir.as_posix() + "/*.feather"))

# ! For spatial maps, we are not interested in height classes
# Keep only files with pattern "height_all"
all_files = [x for x in all_files if "height_all" in x]

print(len(all_files))
print("Al files:")
display(all_files[:3])

print("All regions: ", all_regions)
print("All species: ", all_species)
print("All heights: ", all_heights)

60


['/Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_abies-height_all_region-dep.feather',
 '/Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_abies-height_all_region-gre.feather',
 '/Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_abies-height_all_region-hex.feather']

In [37]:
# ! Shortcut to plot the species of interest one-by-one
my_species = "all"
groups_of_files = [x for x in all_files if f"species_{my_species}" in x]
print(f"Running files:")
for g in groups_of_files:
    print(f" - {g}")

ngroups = min(10, len(groups_of_files))
groups_of_files = split_df_into_list_of_group_or_ns(groups_of_files, ngroups)

Running files:
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-dep.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-gre.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-hex.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-reg.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-ser.feather
Splitting df into 5 random groups


In [31]:
groups_of_files[4]

array(['/Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-ser.feather'],
      dtype='<U128')

In [36]:
# # Test for one group
# make_plots_per_file_parallel(
#     groups_of_files[4],
#     method="direct",
#     run_only_subset=run_only_subset,
#     subset_fraction=subset_fraction,
#     verbose=True,
# )

UnboundLocalError: local variable 'all_metrics' referenced before assignment

In [38]:
from utilities import make_plots_per_file_parallel

# ! Shortcut to plot the species of interest one-by-one
my_species = "all"
groups_of_files = [x for x in all_files if f"species_{my_species}" in x]
print(f"Running files:")
for g in groups_of_files:
    print(f" - {g}")

ngroups = min(10, len(groups_of_files))
groups_of_files = split_df_into_list_of_group_or_ns(groups_of_files, ngroups)


# Run mp
run_mp(
    make_plots_per_file_parallel,
    groups_of_files,
    progress_bar=True,
    num_cores=10,
    method="direct",
    run_only_subset=run_only_subset,
    subset_fraction=subset_fraction,
    verbose=False,
)

Running files:
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-dep.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-gre.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-hex.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-reg.feather
 - /Users/pascal/repos/padasch/ifn_analysis/data/tmp/nfi/growth_and_mortality_data/direct/species_all-height_all_region-ser.feather
Splitting df into 5 random groups


  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [48:04<00:00, 576.89s/it]


[None, None, None, None, None]

## Temporal Patterns


### Direct

- Not implemented yet because this requires having directly region-level calculated mortality rates for all height subsets. Takes a long time to compute and has thus not been done yet.


### Aggregated


# 🚧 Old Code


## Old Code for Mapping Divergent colors nicely... not working


In [None]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import geopandas as gpd
import numpy as np
import matplotlib.colors as colors
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap


# Use some nice font
plt.rcParams["font.sans-serif"] = "DejaVu Sans"

# Load the data
gdf = df_loop.copy()  # .query("year != 2016")
# sp_france = get_shp_of_region("cty")  # Shapefile of France for contour # TODO TURN ON AGAIN!

# Unique years to create subplots for
unique_years = sorted(gdf["year"].unique())

# Set up figure and GridSpec
n_cols = int(np.ceil(7 / 2))  # Make sure to fit on two rows
fig = plt.figure(figsize=(15, 8))

# Allocate the last column for the colorbar and use 2x2 grid for the rest
gs = gridspec.GridSpec(
    2,
    n_cols + 1,
    height_ratios=[1, 1],
    width_ratios=np.repeat(1, n_cols).tolist() + [0.025],
)

# ! Color normalization and colormap ------------------------------------------
if fig_dic["default_cbar"]:
    # Default
    data_min = 0
    data_max = gdf[fig_dic["var"]].max()
    norm = colors.Normalize(vmin=data_min, vmax=data_max)
    sm = plt.cm.ScalarMappable(cmap=fig_dic["cmap"], norm=norm)

else:
    # Create a custom divergent colormap
    # Calculate the relative lengths of the negative and positive ranges
    data_max = gdf[fig_dic["var"]].max()
    data_min = gdf[fig_dic["var"]].min()

    if data_min > 0:
        raise ValueError("Minimum value for divergent cmap is above 0!")
    if data_max < 0:
        raise ValueError("Maximum value for divergent cmap is below 0!")

    # If data_min or data_max are very small, then make them at least 10% of the range
    data_min = -max(abs(data_max) * 0.1, abs(data_min))
    data_max = max(abs(data_max), abs(data_min) * 0.1)
    data_center = 0

    total_range = data_max - data_min
    neg_range = data_center - data_min
    pos_range = data_max - data_center
    neg_proportion = neg_range / total_range
    pos_proportion = pos_range / total_range

    ## Create the color list, which stretches more on the side with the larger range
    list_colors = [(0, "red"), (neg_proportion, "white"), (1, "blue")]

    # Create a LinearSegmentedColormap object
    fig_dic["cmap"] = LinearSegmentedColormap.from_list("custom_diverging", list_colors)

    # Normalization object
    norm = plt.Normalize(vmin=data_min, vmax=data_max)
    norm = colors.TwoSlopeNorm(vmin=data_min, vcenter=data_center, vmax=data_max)

    # Create a scalar mappable object with our custom colormap and normalization
    sm = plt.cm.ScalarMappable(cmap=fig_dic["cmap"], norm=norm)

    # Create a dummy array for the colorbar to latch onto
    sm.set_array([])

# ! Iterate over the years and create a subplot for each -----------------------
for i, year in enumerate(unique_years):
    ax = fig.add_subplot(gs[i // n_cols, i % n_cols])

    # Filter the data for the year and plot
    data_for_year = gdf[gdf["year"] == year]
    # Plot it
    plot = data_for_year.plot(
        column=fig_dic["var"],
        edgecolor="face",
        linewidth=0.5,
        ax=ax,
        cmap=fig_dic["cmap"],
        norm=norm,
        missing_kwds={"color": "lightgrey", "edgecolor": "lightgrey", "linewidth": 0.5},
    )

    # Add countour of France
    sp_france.plot(ax=ax, color="none", edgecolor="black", linewidth=0.5)

    # Remove axis
    ax.set_axis_off()

    # Add year as text below the map
    ax.text(0.5, 0, str(year), transform=ax.transAxes, ha="center", fontweight="bold")

# ! Add colorbar --------------------------------------------------------------
# Create a colorbar in the space of the last column of the first row
# Span both rows in the last column for the colorbar
cbar_ax = fig.add_subplot(gs[0:2, n_cols])
cbar = fig.colorbar(sm, cax=cbar_ax)
cbar.set_label(fig_dic["legend"])

# ! Finish up -----------------------------------------------------------------
# Adjust layout to accommodate the main title and subplots
plt.tight_layout(rect=[0, 0, 1, 1])

# After creating your subplots and before showing or saving the figure
fig.suptitle(fig_dic["main"], fontsize=16, fontweight="bold", position=(0.5, 1.05))

# Show/save the figure
plt.show()

# plt.savefig("test.png", bbox_inches="tight", pad_inches=0.1, dpi=300)
plt.close()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np

# Define the data range and the center for the diverging colormap
data_min, data_max = -25, 50  # Replace with your actual data range
center = 0

# Define the colors for the negative and positive sides, and the center
negative_color = "blue"
positive_color = "red"
center_color = "white"

# Calculate the proportion of the negative and positive ranges relative to the total range
total_range = data_max - data_min
prop_neg = abs(center - data_min) / total_range
prop_pos = abs(data_max - center) / total_range

# Create the color list with the correct proportions
color_list = [
    (0, negative_color),
    (prop_neg, center_color),
    (prop_neg + prop_pos, positive_color),
]

# Create a LinearSegmentedColormap object
custom_cmap = mcolors.LinearSegmentedColormap.from_list("custom_cmap", color_list)

# Normalization object centered at zero
norm = mcolors.TwoSlopeNorm(vmin=data_min, vcenter=center, vmax=data_max)

# Create a ScalarMappable and initialize a data array for the colorbar
sm = plt.cm.ScalarMappable(cmap=custom_cmap, norm=norm)
sm.set_array([])

# Plot the colorbar
plt.figure(figsize=(8, 2))
cbar = plt.colorbar(sm, orientation="horizontal")
cbar.set_label("Change of Basal Area [%/yr]")
plt.show()

In [None]:
# Define the data range for the diverging colormap
data_min, data_max = -20, 100  # Replace with the actual range of xxx

# Calculate the absolute maximum value
abs_max = max(abs(data_min), abs(data_max))

# Define the colors for the negative and positive sides
negative_color = "blue"
positive_color = "red"

# Create the color list with the correct proportions
color_list = [
    (0, negative_color),
    (0.5, "white"),
    (1, positive_color),
]

color_list_ = [
    (0, negative_color),
    (data_max / abs(data_min), "white"),
    (1, positive_color),
]

# Create a LinearSegmentedColormap object
custom_cmap = mcolors.LinearSegmentedColormap.from_list("custom_cmap", color_list)
custom_cmap_ = mcolors.LinearSegmentedColormap.from_list("custom_cmap_", color_list_)

# Normalize the data to range from -1 to 1
norm = colors.TwoSlopeNorm(vmin=-data_max, vcenter=data_center, vmax=data_max)
norm_ = colors.TwoSlopeNorm(vmin=data_min, vcenter=data_center, vmax=data_max)
sm = plt.cm.ScalarMappable(cmap=custom_cmap_, norm=norm_)
sm.set_array([])

# Generate some sample data
x = np.random.randn(100)
y = np.random.randn(100)
xxx = np.random.uniform(data_min, data_max, 100)
xxx.sort()  # Sort the xxx array in ascending order

# Plot the scatter plot with divergent colors
plt.scatter(xxx, y, c=xxx, cmap=custom_cmap, norm=norm, edgecolors="black")

# Plot the colorbar
cbar = plt.colorbar(sm)
cbar.set_label("xxx")

# Show the plot
plt.show()

### Aggregated Method


---


## Spatial Patterns

- There are two approaches:

  - Direct: Aggregate trees to given area and calculate change _directly_ at this level
    - To assess the uncertainty per area and year, this approach requires a bootstrapping
  - Aggregated: Aggregate trees to site and take the mean over the given area
    - To assess the uncertainty per area and year, this approach simply reports the mean and sd across sites

- Plus, we are interested in all species and in species subsets.


### Direct Approach


In [None]:
# Imports
from utilities import calculate_growth_mortality
from run_mp import run_mp

# Get factorial setup
all_regions = ["idp", "reg", "dep", "gre", "ser", "hex"]
all_species = ["all"] + nfi_raw["genus_lat"].value_counts().sort_values(ascending=False)
all_species = all_species.head(10).index.tolist()
all_heights = ["all"] + df_merged["tree_height_class"].value_counts().index.tolist()
all_heights.remove("Missing")

# Make sure poplars are in there
if "Populus" not in all_species:
    all_species.append("Populus")

# Get input df
df_in = nfi_raw.copy()

# Loop over species
for my_species in all_species:
    # Loop over heights
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()

        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")

        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        # Loop over regions
        for my_region in all_regions:
            # Get filename and print it
            my_dir = here(
                str.lower(f"data/tmp/nfi/growth_and_mortality_data/direct_approach")
            )
            os.makedirs(my_dir, exist_ok=True)
            my_file = str.lower(f"species_{my_species}-area_{my_region}")
            my_dirfile = f"{my_dir}/{my_file}.feather"

            # Check if file already exists, if so skip current loop
            if os.path.isfile(my_dirfile) and not force_rerunning_all_sites:
                print(f" - File already exists: {my_file}, skipping it")
                continue

            # Verbose
            print(str.lower(f"\n - Working on: {my_file}"))

            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this time information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]
            df_mp = run_mp(
                calculate_growth_mortality,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                min_trees_per_plot=1,
                grouping_variable=my_region,
            )

            # Save df
            df_mp.reset_index(drop=True).to_feather(my_dirfile)

### Correct Approach


In [None]:
# Load the idp-level data for each species and calculate the mean and sd over desired region

## Temporal Patterns

- For the temporal patterns, we can again do the holistic or the mean-approach:
- For now, to save time, I am only doing the mean approach


### Mean-Approach

- For this, I need factorial species-height subsets but only at the idp level


In [None]:
# Calculate metrics of change at region-level
all_regions = ["idp"]
all_species = ["all"] + df_merged["genus_lat"].value_counts().head(10).index.tolist()
all_heights = ["all"] + df_merged["tree_height_class"].value_counts().index.tolist()
all_heights.remove("Missing")

from utilities import calculate_growth_mortality
from run_mp import run_mp

df_in = df_merged.copy()
group_per_height = True

for my_species in all_species:
    for my_height in all_heights:
        # Get new df
        df_loop = df_in.copy()
        # Filter for species
        if my_species != "all":
            df_loop = df_loop.query(f"genus_lat == '{my_species}'")
        # Filter for height
        if my_height != "all":
            df_loop = df_loop.query(f"tree_height_class == '{my_height}'")

        for my_region in all_regions:
            my_file = str.lower(
                f"species_{my_species}__tree_heights_{my_height}__area_{my_region}"
            )

            print(str.lower(f"\n - Working on: {my_file}"))

            my_region_tmp = my_region

            # IDP holds year information, so no need to add there
            # But grouping for other regions need this information
            if my_region != "idp":
                my_region = my_region + "_year"
                df_loop[my_region] = (
                    df_loop[my_region_tmp].astype(str)
                    + "_"
                    + df_loop["campagne_1"].astype(str)
                )

            # Create list and run mp
            df_list = [
                group for name, group in df_loop.groupby(my_region, as_index=False)
            ]
            df_mp = run_mp(
                calculate_growth_mortality,
                df_list,
                combine_func=pd.concat,
                progress_bar=True,
                num_cores=10,
                min_trees_per_plot=1,
                grouping_variable=my_region,
            )

            # Save df
            my_dir = here(str.lower(f"data/tmp/dfs_metrics_of_change/area-level/"))
            if not os.path.exists(my_dir):
                os.makedirs(my_dir)
            df_mp.reset_index(drop=True).to_feather(f"{my_dir}/{my_file}.feather")

---

## Load Data


### Maps


In [None]:
shp_hex10 = gpd.read_file(here("qgis/hexmaps/hexmap_10000.geojson"))
shp_hex12 = gpd.read_file(here("qgis/hexmaps/hexmap_12500.geojson"))
shp_hex25 = gpd.read_file(here("qgis/hexmaps/hexmap_25000.geojson"))

### NFI Data


In [None]:
# Load NFI data ready for analysis
data = pd.read_feather("nfi_ready_for_spatial_temporal_analysis.feather")

# Move ser and gre columns to the start
data.insert(0, "ser", data.pop("ser"))
data.insert(0, "gre", data.pop("gre"))
data.insert(0, "lat", data.pop("lat"))
data.insert(0, "lon", data.pop("lon"))
data.insert(0, "campagne_1", data.pop("campagne_1"))
data.insert(0, "idp", data.pop("idp"))
data.head()

In [None]:
# Create dictionary of genus-espar
dict_species = data[["genus_lat", "espar_red"]].drop_duplicates().reset_index(drop=True)
dict_species["espar_red"].value_counts(ascending=True).tail(10)

In [None]:
dict_species.query("espar_red == '68'")

In [None]:
dict_species.query("genus_lat == 'Abies'")

### Metric of Change


In [None]:
# Calculate on site-level for aggregation to larger region later on
df_list = data.groupby("idp", as_index=False)
df_list = [group for name, group in df_list]
df_site = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    divide_by_nplots=True,
    grouping_variable=my_group,
)

In [None]:
# Regional Level
df_list = data.groupby("reg", as_index=False)
df_list = [group for name, group in df_list]
df_reg = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="reg",
    divide_by_nplots=True,
)

In [None]:
# Department Level
df_list = data.groupby("dep", as_index=False)
df_list = [group for name, group in df_list]
df_dep = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="dep",
    divide_by_nplots=True,
)

In [None]:
# Sylvoregion Level
df_list = data.groupby("ser", as_index=False)
df_list = [group for name, group in df_list]
df_ser = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="ser",
    divide_by_nplots=True,
)

In [None]:
# Greco Level
df_list = data.groupby("gre", as_index=False)
df_list = [group for name, group in df_list]
df_gre = run_mp(
    calculate_growth_mortality,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    grouping_variable="gre",
    divide_by_nplots=True,
)

## Calculate Metrics of Change

**Target:** Mean of Metric within Region

Routine:

- Filter for tree subset
- For each site, calculate metrics of change
- Group sites by given variable (ser, gre, dep, reg, hex)
- Calculate the mean and sd for that group
- Plot mean and sd for that group

**Alternative:** Calculate total change per region (aggregate all sites to one instead of taking the mean across sites).

**Maps:**

- Plot The difference relative to 2010 Campagnes!


### All species


### Top 10 Species


---
