# Calculate biomass from Living trees and saplings

# Imports and Set-up

In [2]:
# Standard Imports
import sys
import pandas as pd
import numpy as np

# Google Cloud Imports
import pandas_gbq

In [3]:
# Util imports
sys.path.append("../../")  # include parent directory
from src.settings import (
    GCP_PROJ_ID,
    CARBON_POOLS_OUTDIR,
    CARBON_STOCK_OUTDIR,
    SPECIES_LOOKUP_CSV,
    PC_PLOT_LOOKUP_CSV,
)

from src.biomass_equations import (
    calculate_tree_height,
    allometric_tropical_tree,
    allometric_peatland_tree,
    vmd0001_eq1,
    vmd0001_eq2,
    vmd0001_eq5,
)

In [4]:
# Variables
TREES_CSV = CARBON_POOLS_OUTDIR / "trees.csv"
SAPLING_CSV = CARBON_POOLS_OUTDIR / "saplings_ntv_litter.csv"
PLOT_INFO_CSV = CARBON_POOLS_OUTDIR / "plot_info.csv"
TREES_SPECIES_CSV = CARBON_POOLS_OUTDIR / "trees_with_names.csv"
TREES_WD_CSV = CARBON_POOLS_OUTDIR / "trees_with_wood_density.csv"

# BigQuery Variables
SRC_DATASET_ID = "biomass_inventory"
DATASET_ID = "carbon_stock"
IF_EXISTS = "replace"

# Processing Conditions
OUTLIER_REMOVAL = "get_ave"  # Options: "get_ave", "drop_outliers", "eq_150"

## Load data

### Plot Data

In [5]:
if PLOT_INFO_CSV.exists():
    plot_info = pd.read_csv(PLOT_INFO_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.plot_info"""

    # Read the BigQuery table into a dataframe
    plot_info = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    plot_info.to_csv(PLOT_INFO_CSV, index=False)

In [6]:
plot_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 31 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   unique_id                  674 non-null    object 
 1   data_recorder              674 non-null    object 
 2   team_no                    674 non-null    int64  
 3   plot_code_nmbr             674 non-null    int64  
 4   plot_type                  674 non-null    object 
 5   sub_plot                   674 non-null    object 
 6   yes_no                     674 non-null    object 
 7   sub_plot_shift             634 non-null    object 
 8   GPS_waypt                  634 non-null    float64
 9   GPS_id                     634 non-null    float64
 10  GPS                        577 non-null    object 
 11  GPS_latitude               577 non-null    float64
 12  GPS_longitude              577 non-null    float64
 13  GPS_altitude               577 non-null    float64

### Trees data

In [7]:
if TREES_CSV.exists():
    trees = pd.read_csv(TREES_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.trees"""

    # Read the BigQuery table into a dataframe
    trees = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    trees.to_csv(TREES_CSV, index=False)

In [8]:
trees.rename(
    columns={"species_name": "code_species", "family_name": "code_family"}, inplace=True
)

In [9]:
trees.loc[trees["code_species"] == 999, "code_species"] = np.nan

In [10]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   unique_id     6579 non-null   object 
 1   nest          6579 non-null   int64  
 2   code_species  4993 non-null   float64
 3   code_family   1330 non-null   float64
 4   DBH           6579 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 257.1+ KB


In [11]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH
0,308D1,2,,25.0,10.8
1,308D1,2,,25.0,17.3


### Saplings data

In [12]:
if SAPLING_CSV.exists():
    saplings = pd.read_csv(SAPLING_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.saplings_ntv_litter"""

    # Read the BigQuery table into a dataframe
    saplings = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    saplings.to_csv(SAPLING_CSV, index=False)

In [13]:
saplings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   unique_id             674 non-null    object 
 1   count_saplings        589 non-null    float64
 2   litter_bag_weight     620 non-null    float64
 3   litter_sample_weight  620 non-null    float64
 4   ntv_bag_weight        620 non-null    float64
 5   ntv_sample_weight     620 non-null    float64
dtypes: float64(5), object(1)
memory usage: 31.7+ KB


### Tree species

In [14]:
species = pd.read_csv(SPECIES_LOOKUP_CSV)

In [15]:
species.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 375 entries, 0 to 374
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   code_family      74 non-null     float64
 1   family           74 non-null     object 
 2   scientific_name  375 non-null    object 
 3   local_name       375 non-null    object 
 4   code_species     375 non-null    int64  
 5   corrected_genus  375 non-null    object 
 6   wood_density     375 non-null    float64
dtypes: float64(2), int64(1), object(4)
memory usage: 20.6+ KB


In [16]:
species.head(2)

Unnamed: 0,code_family,family,scientific_name,local_name,code_species,corrected_genus,wood_density
0,999.0,Unknown,Litchi chinensis,Alupag - amo,193,Litchi,0.608902
1,1.0,Alangiaceae,Alangium javanicum,Putian,15,Alangium,0.608902


### Plot lookup

In [17]:
plot_strata = pd.read_csv(PC_PLOT_LOOKUP_CSV)

In [18]:
plot_strata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1760 entries, 0 to 1759
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Strata     1760 non-null   int64 
 1   unique_id  1760 non-null   object
 2   LCC        1760 non-null   object
 3   Bgy_Name   1760 non-null   object
 4   Mun_Name   1760 non-null   object
 5   Pro_Name   1760 non-null   object
dtypes: int64(1), object(5)
memory usage: 82.6+ KB


# Calculate tree biomass

## Remove outliers


In [19]:
if OUTLIER_REMOVAL == "get_ave":
    mean_dbh = pd.DataFrame(trees.groupby("unique_id")["DBH"].mean()).reset_index()
    trees.loc[trees["DBH"] >= 150, "DBH"] = trees.loc[
        trees["DBH"] >= 150, "unique_id"
    ].map(mean_dbh.set_index("unique_id")["DBH"])
elif OUTLIER_REMOVAL == "drop_outliers":
    trees = trees[trees["DBH"] < 150].copy()
elif OUTLIER_REMOVAL == "eq_150":
    trees.loc[trees["DBH"] >= 150, "DBH"] = 150

## Add species using lookup table

In [20]:
species_dict = (
    species[["scientific_name", "code_species"]]
    .set_index("code_species")
    .to_dict()["scientific_name"]
)

In [21]:
trees["scientific_name"] = trees["code_species"].replace(species_dict)

In [22]:
# create lookup table for family name and code
species_family = species[["code_family", "family"]].drop_duplicates()

In [23]:
family_dict = species_family.set_index("code_family").to_dict()["family"]

In [24]:
trees["family_name"] = trees["code_family"].replace(family_dict)

In [25]:
trees

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name
0,308D1,2,,25.0,10.8,,Fabaceae
1,308D1,2,,25.0,17.3,,Fabaceae
2,308D1,2,,25.0,12.8,,Fabaceae
3,308D1,2,,25.0,28.1,,Fabaceae
4,308A1,2,,,18.7,,
...,...,...,...,...,...,...,...
6574,38C1,4,278.0,,50.1,Shorea astylosa,
6575,38C1,4,292.0,,80.8,Shorea polysperma,
6576,4B1,4,205.0,,81.7,Macaranga bicolor,
6577,4A1,4,289.0,,84.7,Shorea palosapis,


In [26]:
trees[(trees.scientific_name.notnull()) & (trees.code_family.isnull())]

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name
10,308C1,2,39.0,,68.3,Artocarpus blancoi,
30,22B1,2,323.0,,14.1,Terminalia copelandi,
31,22B1,2,323.0,,12.6,Terminalia copelandi,
33,22C1,2,313.0,,26.0,Syzygium brevistylum,
34,22C1,2,313.0,,12.8,Syzygium brevistylum,
...,...,...,...,...,...,...,...
6574,38C1,4,278.0,,50.1,Shorea astylosa,
6575,38C1,4,292.0,,80.8,Shorea polysperma,
6576,4B1,4,205.0,,81.7,Macaranga bicolor,
6577,4A1,4,289.0,,84.7,Shorea palosapis,


In [138]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   unique_id        6579 non-null   object 
 1   nest             6579 non-null   int64  
 2   code_species     4993 non-null   float64
 3   code_family      1330 non-null   float64
 4   DBH              6579 non-null   float64
 5   scientific_name  4993 non-null   object 
 6   family_name      1330 non-null   object 
dtypes: float64(3), int64(1), object(3)
memory usage: 359.9+ KB


In [139]:
trees.to_csv(TREES_SPECIES_CSV, index=False)

## Get genus and wood density using BIOMASS R library

Wood density was generated using [BIOMASS](https://www.rdocumentation.org/packages/BIOMASS/versions/2.1.11) library from R. For further information, 

In [140]:
!Rscript $SRC_DIR"/get_wood_density.R" $TREES_SPECIES_CSV $TREES_WD_CSV

Fatal error: cannot open file '/get_wood_density.R': No such file or directory


In [141]:
trees = pd.read_csv(TREES_WD_CSV)

In [142]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density
0,308D2,2,,25.0,10.8,,Fabaceae,,0.702417
1,308D2,2,,25.0,17.3,,Fabaceae,,0.702417


## Estimate tree height

In [143]:
trees = calculate_tree_height(trees, "DBH")

In [144]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height
0,308D2,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612
1,308D2,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661


## Add strata to trees


In [145]:
trees = trees.merge(plot_strata[["unique_id", "Strata"]], on="unique_id", how="left")

In [146]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata
0,308D2,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2
1,308D2,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2


## Calculate biomass and carbon stock for tree AGB 

In [147]:
tropical_trees = trees.loc[trees["Strata"].isin([1, 2, 3])].copy()

In [148]:
tropical_trees = allometric_tropical_tree(
    tropical_trees, "wood_density", "DBH", "height"
)

In [149]:
peatland_trees = trees.loc[trees["Strata"].isin([4, 5, 6])].copy()

In [150]:
peatland_trees = allometric_peatland_tree(peatland_trees, "DBH")

In [151]:
trees = pd.concat([tropical_trees, peatland_trees])

In [152]:
trees = vmd0001_eq1(trees, 0.47)

In [153]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata,aboveground_biomass,aboveground_carbon_tonnes
0,308D2,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2,608.932762,286.198398
1,308D2,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2,1972.847878,927.238503


## Calculate below ground biomass

In [154]:
trees = vmd0001_eq5(
    trees,
)

## Export data and Upload to BQ

In [155]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6579 entries, 0 to 2816
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   unique_id                   6579 non-null   object 
 1   nest                        6579 non-null   int64  
 2   code_species                4993 non-null   float64
 3   code_family                 1330 non-null   float64
 4   DBH                         6579 non-null   float64
 5   scientific_name             4993 non-null   object 
 6   family_name                 1940 non-null   object 
 7   corrected_genus             4993 non-null   object 
 8   wood_density                6579 non-null   float64
 9   height                      6579 non-null   float64
 10  Strata                      6579 non-null   int64  
 11  aboveground_biomass         6579 non-null   float64
 12  aboveground_carbon_tonnes   6579 non-null   float64
 13  below_ground_carbon_tonnes  6579 non-n

In [156]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata,aboveground_biomass,aboveground_carbon_tonnes,below_ground_carbon_tonnes
0,308D2,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2,608.932762,286.198398,68.687616
1,308D2,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2,1972.847878,927.238503,222.537241


In [157]:
# Upload to BQ
if len(trees) != 0:
    trees.to_csv(CARBON_STOCK_OUTDIR / "trees_carbon_tonnes.csv", index=False)
    pandas_gbq.to_gbq(
        trees,
        f"{DATASET_ID}.trees_carbon_tonnes",
        project_id=GCP_PROJ_ID,
        if_exists=IF_EXISTS,
        progress_bar=True,
    )
else:
    raise ValueError("Dataframe is empty.")

100%|██████████| 1/1 [00:00<00:00, 9157.87it/s]


# Calculate sapling biomass

In [158]:
saplings = vmd0001_eq1(saplings, is_sapling=True)

In [159]:
# Calculate corrected radius for sapling nest based on slope (in radians)
corrected_radius = 2 / np.cos(plot_info["slope_radians"])

In [160]:
# Calculate new total subplot area based on corrected radius
plot_info["corrected_sapling_area_m2"] = np.pi * corrected_radius * 2

In [161]:
saplings = saplings.merge(
    plot_info[["unique_id", "corrected_sapling_area_m2"]], on="unique_id"
)

In [162]:
saplings = vmd0001_eq2(saplings)

In [163]:
saplings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 732 entries, 0 to 731
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   unique_id                  732 non-null    object 
 1   count_saplings             623 non-null    float64
 2   litter_bag_weight          656 non-null    float64
 3   litter_sample_weight       656 non-null    float64
 4   ntv_bag_weight             656 non-null    float64
 5   ntv_sample_weight          656 non-null    float64
 6   aboveground_carbon_tonnes  732 non-null    float64
 7   corrected_sapling_area_m2  656 non-null    float64
 8   CO2e_per_ha                656 non-null    float64
dtypes: float64(8), object(1)
memory usage: 51.6+ KB


In [164]:
# Upload to BQ
if len(saplings) != 0:
    saplings.to_csv(CARBON_STOCK_OUTDIR / "saplings_carbon_stock.csv", index=False)
    pandas_gbq.to_gbq(
        saplings,
        f"{DATASET_ID}.saplings_carbon_stock",
        project_id=GCP_PROJ_ID,
        if_exists=IF_EXISTS,
        progress_bar=True,
    )
else:
    raise ValueError("Dataframe is empty.")

100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]
