# Drought / Shadow / Water Tolerances following Niinements et al. 2006


In [None]:
import sys

sys.path.insert(0, "../../src")
from imports import *

init_notebook()

In [None]:
# Subset of species for which the final model runs worked
all_species = [
    "Abies alba",
    "Acer campestre",
    "Acer pseudoplatanus",
    "Alnus glutinosa",
    "Alnus incana",
    "Arbutus unedo",
    "Betula pendula",
    "Betula pubescens",
    "Buxus sempervirens",
    "Carpinus betulus",
    "Castanea sativa",
    "Corylus avellana",
    "Crataegus monogyna",
    "Fagus sylvatica",
    "Frangula alnus",
    "Fraxinus excelsior",
    "Juniperus communis",
    "Laburnum anagyroides",
    "Larix decidua",
    "Malus sylvestris",
    "Picea abies",
    "Picea sitchensis",
    "Pinus halepensis",
    "Pinus mugo",
    "Pinus nigra",
    "Pinus pinaster",
    "Pinus sylvestris",
    "Populus",
    "Populus nigra",
    "Populus tremula",
    "Prunus avium",
    "Prunus spinosa",
    "Pseudotsuga menziesii",
    "Quercus ilex",
    "Quercus petraea",
    "Quercus pubescens",
    "Quercus pyrenaica",
    "Quercus robur",
    "Robinia pseudoacacia",
    "Salix alba",
    "Salix caprea",
    "Salix cinerea",
    "Sambucus nigra",
    "Sorbus aria",
    "Sorbus aucuparia",
    "Sorbus torminalis",
    "Tilia cordata",
    "Ulmus minor",
]

In [None]:
# Load nfi data
nfi = get_final_nfi_data_for_analysis()

# Reduced df
nfi_clean = (
    nfi[["species_lat2", "tree_class"]]
    .query("species_lat2 in @all_species")
    .drop_duplicates()
)

## NFI Data


In [None]:
## Attach mean height per species

# First get count of trees
df_htot = get_final_nfi_data_for_analysis(verbose=False).query(
    "tree_state_change in ['alive_alive', 'alive_dead']"
)

# Get normalized and non normalized counts
species = df_htot["species_lat2"].value_counts()
species_norm = df_htot["species_lat2"].value_counts(normalize=True)

# Get subsets
tmp = nfi.copy()
xxx = pd.DataFrame(species).reset_index()

# Get mean and std of htot per species and attach it
tmp = tmp.groupby("species_lat2").agg({"htot": ["mean", "std"]}).reset_index()
tmp.columns = ["species_lat2", "htot_mean", "htot_std"]
xxx = pd.merge(xxx, tmp, how="left", on="species_lat2")

# Subset to relevant species
xxx = xxx.sort_values("count", ascending=False)
# xxx = xxx.head(20)
xxx = xxx.query("species_lat2 in @all_species")
xxx = xxx.sort_values("htot_mean", ascending=False)


# Attach data
nfi_clean = pd.merge(nfi_clean, xxx, how="left", on="species_lat2").drop(
    "htot_std", axis=1
)

# Plot height with error bars
plt.figure(figsize=(10, 10))
plt.errorbar(
    xxx["htot_mean"],
    xxx["species_lat2"],
    xerr=xxx["htot_std"],
    fmt="o",
    color="black",
    ecolor="lightgray",
    elinewidth=3,
    capsize=0,
)
plt.xlabel("Mean height (m)")
plt.show()

In [None]:
# Attach succesion class
data = {
    "species_lat2": [
        "Abies alba",
        "Acer campestre",
        "Acer pseudoplatanus",
        "Alnus glutinosa",
        "Alnus incana",
        "Arbutus unedo",
        "Betula pendula",
        "Betula pubescens",
        "Buxus sempervirens",
        "Carpinus betulus",
        "Castanea sativa",
        "Corylus avellana",
        "Crataegus monogyna",
        "Fagus sylvatica",
        "Frangula alnus",
        "Fraxinus excelsior",
        "Juniperus communis",
        "Laburnum anagyroides",
        "Larix decidua",
        "Malus sylvestris",
        "Picea abies",
        "Picea sitchensis",
        "Pinus halepensis",
        "Pinus mugo",
        "Pinus nigra",
        "Pinus pinaster",
        "Pinus sylvestris",
        "Populus",
        "Populus nigra",
        "Populus tremula",
        "Prunus avium",
        "Prunus spinosa",
        "Pseudotsuga menziesii",
        "Quercus ilex",
        "Quercus petraea",
        "Quercus pubescens",
        "Quercus pyrenaica",
        "Quercus robur",
        "Robinia pseudoacacia",
        "Salix alba",
        "Salix caprea",
        "Salix cinerea",
        "Sambucus nigra",
        "Sorbus aria",
        "Sorbus aucuparia",
        "Sorbus torminalis",
        "Tilia cordata",
        "Ulmus minor",
    ],
    "succession": [
        "Late",
        "Mid",
        "Mid",
        "Early",
        "Early",
        "Late",
        "Early",
        "Early",
        "Late",
        "Late",
        "Mid",
        "Early",
        "Mid",
        "Late",
        "Early",
        "Mid",
        "Early",
        "Mid",
        "Early",
        "Early",
        "Late",
        "Late",
        "Early",
        "Mid",
        "Early",
        "Early",
        "Early",
        "Early",
        "Early",
        "Early",
        "Mid",
        "Early",
        "Mid",
        "Late",
        "Late",
        "Late",
        "Late",
        "Late",
        "Early",
        "Early",
        "Early",
        "Early",
        "Early",
        "Early",
        "Early",
        "Mid",
        "Late",
        "Mid",
    ],
}

# Create the DataFrame
df = pd.DataFrame(data)

# Merge data
nfi_clean = pd.merge(nfi_clean, df, on="species_lat2", how="left")
nfi_clean

## Niinement Data


### Cleaning


In [None]:
# Load csv file for tolerances
tolerances = pd.read_csv("../../docs/niinements2006tolerance_SI-B_index-table.csv")
tolerances

In [None]:
# Clean dataset
tol = tolerances.copy()

# Remove trailing white spaces from variable names and from cells
tol.columns = tol.columns.str.strip()
tol = tol.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# tol.columns = tol.columns.str.replace(" ", "")

# Remove values after ± in tolerance columns
for col in tol.columns:
    if "tolerance" in col:
        tol[col] = tol[col].str.split("±").str[0]

# Reduce species name to first two words
# tol["Species"] = tol["Species"].str.split(" ").str[:2].str.join(" ").str.strip()

# If there are species with the same name, take european species
# for species in tol.Species.tolist():


tol.Species.value_counts()

### Filtering


In [None]:
# Check if all nfi species are in tolerance table
print("\nMissing species before renaming:")
for species in all_species:
    if species not in tol.Species.tolist():
        print(f"- {species} is missing")

# Rename species, if data is actually available
tol = tol.replace({"Species": {"Crataegus monogyna agg.": "Crataegus monogyna"}})

# Repeat check
print("\nMissing species after renaming:")
for species in all_species:
    if species not in tol.Species.tolist():
        print(f"- {species} is missing")

### Attaching


In [None]:
# Get final df
tol_clean = tol[
    [
        "Species",
        # "Aggregate species",
        # "Data set 1",
        # "Data set 2",
        # "Evergreen",
        # "Gymnosperm",
        "Shade tolerance",
        "Drought tolerance",
        "Waterlogging tolerance",
    ]
]

tol_clean = tol_clean.rename(
    {
        "Species": "species_lat2",
        "Shade tolerance": "nii_shade",
        "Drought tolerance": "nii_drought",
        "Waterlogging tolerance": "nii_waterlog",
    },
    axis=1,
)
tol_clean

In [None]:
# Merge
df_merged = pd.merge(nfi_clean, tol_clean, on="species_lat2", how="left")

# Check if number of species remained the same
if len(df_merged) != len(all_species):
    chime.error()
    raise ValueError("Number of species in all_species and df_merged are different!")

df_niinements_final = df_merged.copy()
df_niinements_final

## Final data


In [None]:
# Merge all together
df_merged = nfi_clean.copy()
df_merged = pd.merge(df_merged, tol_clean, on="species_lat2", how="left")

# Fix dtypes
df_merged["tree_class"] = df_merged["tree_class"].astype(str)
df_merged["nii_shade"] = df_merged["nii_shade"].astype(float)
df_merged["nii_drought"] = df_merged["nii_drought"].astype(float)
df_merged["nii_waterlog"] = df_merged["nii_waterlog"].astype(float)
df_merged.sort_values("species_lat2")

# Save to csv
df_merged.to_csv(
    "../../data/final/tables/species_tolerance_and_habita_data.csv", index=False
)

In [None]:
raise Exception("End of Script")