# Calculate biomass from Living trees and saplings

# Imports and Set-up

In [1]:
# Standard Imports
import sys
import os
import pandas as pd
import numpy as np

# Google Cloud Imports
import pandas_gbq

In [2]:
# Util imports
sys.path.append("../../")  # include parent directory
from src.settings import (
    DATA_DIR,
    GCP_PROJ_ID,
    CARBON_POOLS_OUTDIR,
    CARBON_STOCK_OUTDIR,
    SPECIES_LOOKUP_CSV,
    PC_PLOT_LOOKUP_CSV,
)

from src.biomass_equations import (
    calculate_tree_height,
    allometric_tropical_tree,
    allometric_peatland_tree,
    vmd0001_eq1,
)

In [3]:
# Variables
TREES_CSV = CARBON_POOLS_OUTDIR / "trees.csv"
TREES_WD_CSV = CARBON_POOLS_OUTDIR / "trees_with_wood_density.csv"
SAPLING_CSV = CARBON_POOLS_OUTDIR / "saplings_ntv_litter.csv"
PLOT_INFO_CSV = CARBON_POOLS_OUTDIR / "plot_info.csv"

# BigQuery Variables
DATASET_ID = "carbon_stock"
IF_EXISTS = "replace"

# Processing Conditions
OUTLIER_REMOVAL = "get_ave"  # Options: "get_ave", "drop_outliers", "eq_150"

## Load data

### Plot Data

In [4]:
if PLOT_INFO_CSV.exists():
    plot_info = pd.read_csv(PLOT_INFO_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{DATASET_ID}.plot_info"""

    # Read the BigQuery table into a dataframe
    plot_info = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    plot_info.to_csv(PLOT_INFO_CSV, index=False)

In [5]:
plot_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   unique_id               674 non-null    object 
 1   data_recorder           674 non-null    object 
 2   team_no                 674 non-null    int64  
 3   plot_code_nmbr          674 non-null    int64  
 4   plot_type               674 non-null    object 
 5   sub_plot                674 non-null    object 
 6   yes_no                  674 non-null    object 
 7   sub_plot_shift          634 non-null    object 
 8   GPS_waypt               634 non-null    float64
 9   GPS_id                  634 non-null    float64
 10  GPS                     577 non-null    object 
 11  GPS_latitude            577 non-null    float64
 12  GPS_longitude           577 non-null    float64
 13  GPS_altitude            577 non-null    float64
 14  GPS_precision           577 non-null    fl

### Trees data

In [6]:
if TREES_CSV.exists():
    trees = pd.read_csv(TREES_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{DATASET_ID}.trees"""

    # Read the BigQuery table into a dataframe
    trees = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    trees.to_csv(TREES_CSV, index=False)

In [7]:
trees.rename(
    columns={"species_name": "code_species", "family_name": "code_family"}, inplace=True
)

In [8]:
trees.loc[trees["code_species"] == 999, "code_species"] = np.nan

In [9]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   unique_id     6579 non-null   object 
 1   nest          6579 non-null   int64  
 2   code_species  4993 non-null   float64
 3   code_family   1330 non-null   float64
 4   DBH           6579 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 257.1+ KB


In [10]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH
0,308D2,2,,25.0,10.8
1,308D2,2,,25.0,17.3


### Saplings data

In [11]:
if SAPLING_CSV.exists():
    saplings = pd.read_csv(SAPLING_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{DATASET_ID}.saplings_ntv_litter"""

    # Read the BigQuery table into a dataframe
    saplings = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    saplings.to_csv(SAPLING_CSV, index=False)

In [12]:
saplings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   unique_id             674 non-null    object 
 1   count_saplings        589 non-null    float64
 2   litter_bag_weight     620 non-null    float64
 3   litter_sample_weight  620 non-null    float64
 4   ntv_bag_weight        620 non-null    float64
 5   ntv_sample_weight     620 non-null    float64
dtypes: float64(5), object(1)
memory usage: 31.7+ KB


### Tree species

In [13]:
species = pd.read_csv(SPECIES_LOOKUP_CSV)

In [14]:
species.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 375 entries, 0 to 374
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   code_family      74 non-null     float64
 1   family           74 non-null     object 
 2   scientific_name  375 non-null    object 
 3   local_name       375 non-null    object 
 4   code_species     375 non-null    int64  
 5   corrected_genus  375 non-null    object 
 6   wood_density     375 non-null    float64
dtypes: float64(2), int64(1), object(4)
memory usage: 20.6+ KB


In [15]:
species.head(2)

Unnamed: 0,code_family,family,scientific_name,local_name,code_species,corrected_genus,wood_density
0,999.0,Unknown,Litchi chinensis,Alupag - amo,193,Litchi,0.608902
1,1.0,Alangiaceae,Alangium javanicum,Putian,15,Alangium,0.608902


### Plot lookup

In [16]:
plot_strata = pd.read_csv(PC_PLOT_LOOKUP_CSV)

In [17]:
plot_strata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1760 entries, 0 to 1759
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Strata     1760 non-null   int64 
 1   unique_id  1760 non-null   object
 2   LCC        1760 non-null   object
 3   Bgy_Name   1760 non-null   object
 4   Mun_Name   1760 non-null   object
 5   Pro_Name   1760 non-null   object
dtypes: int64(1), object(5)
memory usage: 82.6+ KB


# Calculate tree biomass

## Remove outliers


In [18]:
if OUTLIER_REMOVAL == "get_ave":
    mean_dbh = pd.DataFrame(trees.groupby("unique_id")["DBH"].mean()).reset_index()
    trees.loc[trees["DBH"] >= 150, "DBH"] = trees.loc[
        trees["DBH"] >= 150, "unique_id"
    ].map(mean_dbh.set_index("unique_id")["DBH"])
elif OUTLIER_REMOVAL == "drop_outliers":
    trees = trees[trees["DBH"] < 150].copy()
elif OUTLIER_REMOVAL == "eq_150":
    trees.loc[trees["DBH"] >= 150, "DBH"] = 150

## Add species using lookup table

In [19]:
species_trees = trees.merge(species, on="code_species", how="left")

In [20]:
# add species name based on lookup file
trees["scientific_name"] = species_trees["scientific_name"]

# add family name based on lookup file
trees["family_name"] = species_trees["family"]

In [21]:
species_family = species[["code_family", "family"]].drop_duplicates()

In [22]:
family_trees = trees.merge(species_family, on="code_family", how="left")

In [23]:
trees.loc[(trees.code_family.notna()), "family_name"] = family_trees.loc[
    (family_trees.code_family.notna()), "family"
]

In [24]:
trees.fillna({"scientific_name": "Unknown", "family_name": "Unknown"}, inplace=True)

In [25]:
trees[(trees["scientific_name"] == "Unknown") & (trees["family_name"] == "Unknown")]

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name
4,308A2,2,,,18.70,Unknown,Unknown
5,308A2,2,,,20.20,Unknown,Unknown
6,308A2,2,,,14.70,Unknown,Unknown
7,308A2,2,,,12.00,Unknown,Unknown
32,22C2,2,,,20.40,Unknown,Unknown
...,...,...,...,...,...,...,...
6466,399C2,4,,,84.90,Unknown,Unknown
6470,399A2,4,,,62.00,Unknown,Unknown
6472,203D2,4,,,69.55,Unknown,Unknown
6530,80B2,4,,,58.30,Unknown,Unknown


In [26]:
trees.to_csv(CARBON_POOLS_OUTDIR / "trees_with_names.csv", index=False)

## Get genus and wood density using BIOMASS R library

Wood density was generated using [BIOMASS](https://www.rdocumentation.org/packages/BIOMASS/versions/2.1.11) library from R. For further information, 

[To do]: insert running r script to get wood density and genus using R

In [27]:
trees = pd.read_csv(CARBON_POOLS_OUTDIR / "trees_with_wood_density.csv")

In [28]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,meanWD
0,308D2,2,,25.0,10.8,Unknown,Fabaceae,Unknown,0.702417
1,308D2,2,,25.0,17.3,Unknown,Fabaceae,Unknown,0.702417


## Estimate tree height

In [29]:
trees = calculate_tree_height(trees, "DBH")

In [30]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,meanWD,height
0,308D2,2,,25.0,10.8,Unknown,Fabaceae,Unknown,0.702417,13.05612
1,308D2,2,,25.0,17.3,Unknown,Fabaceae,Unknown,0.702417,16.968661


## Add strata to trees


In [31]:
trees = trees.merge(plot_strata[["unique_id", "Strata"]], on="unique_id", how="left")

In [32]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,meanWD,height,Strata
0,308D2,2,,25.0,10.8,Unknown,Fabaceae,Unknown,0.702417,13.05612,2
1,308D2,2,,25.0,17.3,Unknown,Fabaceae,Unknown,0.702417,16.968661,2


## Calculate biomass and carbon stock for tree AGB 

In [33]:
tropical_trees = trees.loc[trees["Strata"].isin([1, 2, 3])].copy()

In [34]:
tropical_trees = allometric_tropical_tree(tropical_trees, "meanWD", "DBH", "height")

In [35]:
peatland_trees = trees.loc[trees["Strata"].isin([4, 5, 6])].copy()

In [36]:
peatland_trees = allometric_peatland_tree(peatland_trees, "DBH")

In [37]:
trees = pd.concat([tropical_trees, peatland_trees])

In [38]:
trees = vmd0001_eq1(trees, 0.47)

In [39]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,meanWD,height,Strata,aboveground_biomass,aboveground_carbon_stock
0,308D2,2,,25.0,10.8,Unknown,Fabaceae,Unknown,0.702417,13.05612,2,608.932762,286.198398
1,308D2,2,,25.0,17.3,Unknown,Fabaceae,Unknown,0.702417,16.968661,2,1972.847878,927.238503


## Calculate below ground biomass

In [40]:
def vmd0001_eq5(
    df: pd.DataFrame,
    carbon_stock_col: str = "aboveground_carbon_stock",
    eco_zone: str = "tropical_rainforest",
):
    if eco_zone == "tropical_rainforest" or eco_zone == "subtropical_humid":
        df["below_ground_carbon_stock"] = np.where(
            df[carbon_stock_col] < 125,
            df[carbon_stock_col] * 0.20,
            df[carbon_stock_col] * 0.24,
        )
    elif eco_zone == "subtropical_dry":
        df["below_ground_carbon_stock"] = np.where(
            df[carbon_stock_col] < 20,
            df[carbon_stock_col] * 0.56,
            df[carbon_stock_col] * 0.28,
        )
    else:
        raise ValueError(
            "Invalid eco_zone value. Please choose a valid eco_zone or add root-to-shoot ratio for the desired ecological zone."
        )

    return df

In [42]:
trees = vmd0001_eq5(trees)

# Calculate sapling biomass

In [61]:
saplings = vmd0001_eq1(saplings, is_sapling=True)

In [58]:
# Calculate corrected radius for sapling nest based on slope (in radians)
corrected_radius = 2 / np.cos(plot_info["slope_radians"])

In [59]:
# Calculate new total subplot area based on corrected radius
plot_info["corrected_sapling_area_m2"] = np.pi * corrected_radius * 2