# Calculate biomass from Living trees and saplings

# Imports and Set-up

In [109]:
# Standard Imports
import sys
import pandas as pd
import numpy as np

# Google Cloud Imports
import pandas_gbq

In [110]:
# Util imports
sys.path.append("../../")  # include parent directory
from src.settings import (
    GCP_PROJ_ID,
    CARBON_POOLS_OUTDIR,
    CARBON_STOCK_OUTDIR,
    TMP_OUT_DIR,
    SPECIES_LOOKUP_CSV,
    PC_PLOT_LOOKUP_CSV,
)

from src.biomass_equations import (
    calculate_tree_height,
    allometric_tropical_tree,
    allometric_peatland_tree,
    vmd0001_eq1,
    vmd0001_eq2a,
    vmd0001_eq2b,
    vmd0001_eq5,
)

In [111]:
# Variables
TREES_CSV = CARBON_POOLS_OUTDIR / "trees.csv"
SAPLING_CSV = CARBON_POOLS_OUTDIR / "saplings_ntv_litter.csv"
PLOT_INFO_CSV = CARBON_POOLS_OUTDIR / "plot_info.csv"
TREES_SPECIES_CSV = TMP_OUT_DIR / "trees_with_names.csv"
TREES_WD_CSV = TMP_OUT_DIR / "trees_with_wood_density.csv"

# BigQuery Variables
SRC_DATASET_ID = "biomass_inventory"
DATASET_ID = "carbon_stock"
IF_EXISTS = "replace"

# Processing Conditions
OUTLIER_REMOVAL = "get_ave"  # Options: "get_ave", "drop_outliers", "eq_150"

## Load data

### Plot Data

In [112]:
if PLOT_INFO_CSV.exists():
    plot_info = pd.read_csv(PLOT_INFO_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.plot_info"""

    # Read the BigQuery table into a dataframe
    plot_info = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    plot_info.to_csv(PLOT_INFO_CSV, index=False)

In [113]:
plot_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Data columns (total 31 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   unique_id                  671 non-null    object 
 1   data_recorder              671 non-null    object 
 2   team_no                    671 non-null    int64  
 3   plot_code_nmbr             671 non-null    int64  
 4   plot_type                  671 non-null    object 
 5   sub_plot                   671 non-null    object 
 6   yes_no                     671 non-null    object 
 7   sub_plot_shift             633 non-null    object 
 8   GPS_waypt                  633 non-null    float64
 9   GPS_id                     633 non-null    float64
 10  GPS                        576 non-null    object 
 11  GPS_latitude               576 non-null    float64
 12  GPS_longitude              576 non-null    float64
 13  GPS_altitude               576 non-null    float64

In [114]:
# get the slope adjusted area per nest per subplot and creaste dict for substitution
plot_info_subset = plot_info[
    [
        "unique_id",
        "corrected_plot_area_n2_m2",
        "corrected_plot_area_n3_m2",
        "corrected_plot_area_n4_m2",
    ]
].copy()
plot_info_subset.dropna(inplace=True)
plot_info_subset.drop_duplicates(subset=["unique_id"], inplace=True)
plot_info_subset_dict = plot_info_subset.to_dict(orient="records")

In [115]:
plot_info_subset_dict[1]

{'unique_id': '308A1',
 'corrected_plot_area_n2_m2': 79.86713923588653,
 'corrected_plot_area_n3_m2': 718.8042531229787,
 'corrected_plot_area_n4_m2': 1277.8742277741844}

### Trees data

In [116]:
if TREES_CSV.exists():
    trees = pd.read_csv(TREES_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.trees"""

    # Read the BigQuery table into a dataframe
    trees = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    trees.to_csv(TREES_CSV, index=False)

In [117]:
trees.rename(
    columns={"species_name": "code_species", "family_name": "code_family"}, inplace=True
)

In [118]:
trees.loc[trees["code_species"] == 999, "code_species"] = np.nan

In [119]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   unique_id     6579 non-null   object 
 1   nest          6579 non-null   int64  
 2   code_species  4993 non-null   float64
 3   code_family   1330 non-null   float64
 4   DBH           6579 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 257.1+ KB


In [120]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH
0,308D1,2,,25.0,10.8
1,308D1,2,,25.0,17.3


### Saplings data

In [121]:
if SAPLING_CSV.exists():
    saplings = pd.read_csv(SAPLING_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.saplings_ntv_litter"""

    # Read the BigQuery table into a dataframe
    saplings = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    saplings.to_csv(SAPLING_CSV, index=False)

In [122]:
saplings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   unique_id             671 non-null    object 
 1   count_saplings        589 non-null    float64
 2   litter_bag_weight     619 non-null    float64
 3   litter_sample_weight  619 non-null    float64
 4   ntv_bag_weight        619 non-null    float64
 5   ntv_sample_weight     619 non-null    float64
dtypes: float64(5), object(1)
memory usage: 31.6+ KB


### Tree species

In [123]:
species = pd.read_csv(SPECIES_LOOKUP_CSV)

In [124]:
species.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 375 entries, 0 to 374
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   code_family      74 non-null     float64
 1   family           74 non-null     object 
 2   scientific_name  375 non-null    object 
 3   local_name       375 non-null    object 
 4   code_species     375 non-null    int64  
 5   corrected_genus  375 non-null    object 
 6   wood_density     375 non-null    float64
dtypes: float64(2), int64(1), object(4)
memory usage: 20.6+ KB


In [125]:
species.head(2)

Unnamed: 0,code_family,family,scientific_name,local_name,code_species,corrected_genus,wood_density
0,999.0,Unknown,Litchi chinensis,Alupag - amo,193,Litchi,0.608902
1,1.0,Alangiaceae,Alangium javanicum,Putian,15,Alangium,0.608902


### Plot lookup

In [126]:
plot_strata = pd.read_csv(PC_PLOT_LOOKUP_CSV)

In [127]:
plot_strata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3508 entries, 0 to 3507
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   unique_id  3508 non-null   object
 1   Strata     3508 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 54.9+ KB


# Calculate tree biomass

## Remove outliers


In [128]:
if OUTLIER_REMOVAL == "get_ave":
    mean_dbh = pd.DataFrame(
        trees.groupby(["unique_id", "nest"])["DBH"].mean()
    ).reset_index()
    trees.loc[trees["DBH"] >= 150, "DBH"] = trees.loc[
        trees["DBH"] >= 150, "unique_id"
    ].map(mean_dbh.set_index(["unique_id", "nest"])["DBH"])

elif OUTLIER_REMOVAL == "drop_outliers":
    trees = trees[trees["DBH"] < 150].copy()

elif OUTLIER_REMOVAL == "eq_150":
    trees.loc[trees["DBH"] >= 150, "DBH"] = 150

## Add species using lookup table

In [129]:
species_dict = (
    species[["scientific_name", "code_species"]]
    .set_index("code_species")
    .to_dict()["scientific_name"]
)

In [130]:
trees["scientific_name"] = trees["code_species"].replace(species_dict)

In [131]:
# create lookup table for family name and code
species_family = species[["code_family", "family"]].drop_duplicates()

In [132]:
family_dict = species_family.set_index("code_family").to_dict()["family"]

In [133]:
trees["family_name"] = trees["code_family"].replace(family_dict)

In [134]:
trees.head()

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name
0,308D1,2,,25.0,10.8,,Fabaceae
1,308D1,2,,25.0,17.3,,Fabaceae
2,308D1,2,,25.0,12.8,,Fabaceae
3,308D1,2,,25.0,28.1,,Fabaceae
4,308A1,2,,,18.7,,


In [135]:
trees[(trees.scientific_name.notnull()) & (trees.code_family.isnull())]

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name
10,308C1,2,39.0,,68.3,Artocarpus blancoi,
30,22B1,2,323.0,,14.1,Terminalia copelandi,
31,22B1,2,323.0,,12.6,Terminalia copelandi,
33,22C1,2,313.0,,26.0,Syzygium brevistylum,
34,22C1,2,313.0,,12.8,Syzygium brevistylum,
...,...,...,...,...,...,...,...
6574,38C1,4,278.0,,50.1,Shorea astylosa,
6575,38C1,4,292.0,,80.8,Shorea polysperma,
6576,4B1,4,205.0,,81.7,Macaranga bicolor,
6577,4A1,4,289.0,,84.7,Shorea palosapis,


In [136]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   unique_id        6579 non-null   object 
 1   nest             6579 non-null   int64  
 2   code_species     4993 non-null   float64
 3   code_family      1330 non-null   float64
 4   DBH              6551 non-null   float64
 5   scientific_name  4993 non-null   object 
 6   family_name      1330 non-null   object 
dtypes: float64(3), int64(1), object(3)
memory usage: 359.9+ KB


In [137]:
trees.to_csv(TREES_SPECIES_CSV, index=False)

## Get genus and wood density using BIOMASS R library

Wood density was generated using [BIOMASS](https://www.rdocumentation.org/packages/BIOMASS/versions/2.1.11) library from R. For further information, 

In [138]:
if not TREES_WD_CSV.exists():
    !Rscript {SRC_DIR}/get_wood_density.R {TREES_SPECIES_CSV} {TREES_WD_CSV}

In [139]:
trees = pd.read_csv(TREES_WD_CSV)

In [140]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density
0,308D1,2,,25.0,10.8,,Fabaceae,,0.702417
1,308D1,2,,25.0,17.3,,Fabaceae,,0.702417


In [141]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   unique_id        6579 non-null   object 
 1   nest             6579 non-null   int64  
 2   code_species     4993 non-null   float64
 3   code_family      1330 non-null   float64
 4   DBH              6579 non-null   float64
 5   scientific_name  4993 non-null   object 
 6   family_name      1330 non-null   object 
 7   corrected_genus  4993 non-null   object 
 8   wood_density     6579 non-null   float64
dtypes: float64(4), int64(1), object(4)
memory usage: 462.7+ KB


## Estimate tree height

In [142]:
trees = calculate_tree_height(trees, "DBH")

In [143]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height
0,308D1,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612
1,308D1,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661


## Add strata to trees


In [144]:
trees = trees.merge(plot_strata[["unique_id", "Strata"]], on="unique_id", how="left")

In [145]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata
0,308D1,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2
1,308D1,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2


In [146]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   unique_id        6579 non-null   object 
 1   nest             6579 non-null   int64  
 2   code_species     4993 non-null   float64
 3   code_family      1330 non-null   float64
 4   DBH              6579 non-null   float64
 5   scientific_name  4993 non-null   object 
 6   family_name      1330 non-null   object 
 7   corrected_genus  4993 non-null   object 
 8   wood_density     6579 non-null   float64
 9   height           6579 non-null   float64
 10  Strata           6579 non-null   int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 565.5+ KB


## Calculate biomass and carbon stock for tree AGB 

In [147]:
plot_strata.Strata.unique()

array([1, 2, 6, 5, 4])

In [148]:
tropical_trees = trees.loc[trees["Strata"].isin([1, 2, 3])].copy()

In [149]:
tropical_trees = allometric_tropical_tree(
    tropical_trees, "wood_density", "DBH", "height"
)

In [150]:
peatland_trees = trees.loc[trees["Strata"].isin([4, 5, 6])].copy()

In [151]:
peatland_trees = allometric_peatland_tree(peatland_trees, "DBH")

In [152]:
trees = pd.concat([tropical_trees, peatland_trees])

In [153]:
trees.head()

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata,aboveground_biomass
0,308D1,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2,60.893276
1,308D1,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2,197.284788
2,308D1,2,,25.0,12.8,,Fabaceae,,0.702417,14.33943,2,92.969509
3,308D1,2,,25.0,28.1,,Fabaceae,,0.702417,22.040406,2,656.369843
4,308A1,2,,,18.7,,,,0.62721,17.719095,2,214.488489


In [155]:
# convert aboveground biomass to tonnes
trees["aboveground_biomass"] = trees["aboveground_biomass"] / 1000

In [156]:
trees = vmd0001_eq1(trees, 0.47)

In [157]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata,aboveground_biomass,aboveground_carbon_tonnes
0,308D1,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2,0.060893,0.02862
1,308D1,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2,0.197285,0.092724


## Calculate below ground biomass

In [158]:
trees = vmd0001_eq5(
    trees,
)

In [159]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,scientific_name,family_name,corrected_genus,wood_density,height,Strata,aboveground_biomass,aboveground_carbon_tonnes,belowground_carbon_tonnes
0,308D1,2,,25.0,10.8,,Fabaceae,,0.702417,13.05612,2,0.060893,0.02862,0.010303
1,308D1,2,,25.0,17.3,,Fabaceae,,0.702417,16.968661,2,0.197285,0.092724,0.033381


## Calculate biomass sum per plot

### AGB

In [160]:
trees_agg_agb = vmd0001_eq2a(trees, ["unique_id", "nest"], "aboveground_carbon_tonnes")

In [161]:
# add the correct area using the unique_id and nest number
trees_agg_agb["corrected_area_m2"] = trees_agg_agb.apply(
    lambda x: next(
        (
            item["corrected_plot_area_n" + str(x["nest"]) + "_m2"]
            for item in plot_info_subset_dict
            if item["unique_id"] == x["unique_id"]
        ),
        None,
    ),
    axis=1,
)

In [163]:
trees_agg_agb = vmd0001_eq2b(
    trees_agg_agb, "aboveground_carbon_tonnes", "corrected_area_m2"
)

In [164]:
# convert tonnes/sqm to tonnes/ha
trees_agg_agb["CO2e_per_ha"] = trees_agg_agb["CO2e_per_ha"] * 10_000

In [100]:
trees_agg_agb.head()

Unnamed: 0,unique_id,nest,aboveground_carbon_tonnes,corrected_area_m2,CO2e_per_ha
0,100A1,2,1.245827,78.571232,0.058139
1,100A1,3,0.5362,707.14109,0.00278
2,100A1,4,1.337752,1257.139716,0.003902
3,100B1,2,0.144053,78.571232,0.006723
4,100C1,2,1.616018,78.571232,0.075414


In [166]:
# calculate tonnes of Carbon per sqm; convert tonnes/sqm to tonnes/ha
trees_agg_agb["tC_per_ha"] = (
    trees_agg_agb["aboveground_carbon_tonnes"] / trees_agg_agb["corrected_area_m2"]
) * 10_000

In [167]:
trees_agg_agb = (
    trees_agg_agb.groupby("unique_id")[["CO2e_per_ha", "tC_per_ha"]]
    .mean()
    .reset_index()
)

In [168]:
trees_agg_agb.rename(
    columns={
        "CO2e_per_ha": "aboveground_CO2e_per_ha",
        "tC_per_ha": "aboveground_tC_per_ha",
    },
    inplace=True,
)

In [169]:
trees_agg_agb.head()

Unnamed: 0,unique_id,aboveground_CO2e_per_ha,aboveground_tC_per_ha
0,100A1,216.069347,58.928004
1,100B1,67.225049,18.334104
2,100C1,754.143561,205.675517
3,100D1,157.781877,43.031421
4,101A1,11.115811,3.031585


### BGB

In [170]:
trees_agg_bgb = vmd0001_eq2a(trees, ["unique_id", "nest"], "belowground_carbon_tonnes")

In [171]:
# add the correct area using the unique_id and nest number
trees_agg_bgb["corrected_area_m2"] = trees_agg_bgb.apply(
    lambda x: next(
        (
            item["corrected_plot_area_n" + str(x["nest"]) + "_m2"]
            for item in plot_info_subset_dict
            if item["unique_id"] == x["unique_id"]
        ),
        None,
    ),
    axis=1,
)

In [172]:
trees_agg_bgb = vmd0001_eq2b(
    trees_agg_bgb, "belowground_carbon_tonnes", "corrected_area_m2"
)

In [174]:
# convert tonnes/sqm to tonnes/ha
trees_agg_bgb["CO2e_per_ha"] = trees_agg_bgb["CO2e_per_ha"] * 10_000

In [175]:
trees_agg_bgb.head()

Unnamed: 0,unique_id,nest,belowground_carbon_tonnes,corrected_area_m2,CO2e_per_ha
0,100A1,2,0.448498,78.571232,209.299376
1,100A1,3,0.193032,707.14109,10.009092
2,100A1,4,0.481591,1257.139716,14.046427
3,100B1,2,0.051859,78.571232,24.201018
4,100C1,2,0.581766,78.571232,271.491682


In [176]:
# calculate tonnes of Carbon per sqm; convert tonnes/sqm to tonnes/ha
trees_agg_bgb["tC_per_ha"] = (
    trees_agg_bgb["belowground_carbon_tonnes"] / trees_agg_bgb["corrected_area_m2"]
) * 10_000

In [177]:
trees_agg_bgb = (
    trees_agg_bgb.groupby("unique_id")[["CO2e_per_ha", "tC_per_ha"]]
    .mean()
    .reset_index()
)

In [178]:
trees_agg_bgb.rename(
    columns={
        "CO2e_per_ha": "belowground_CO2e_per_ha",
        "tC_per_ha": "belowground_tC_per_ha",
    },
    inplace=True,
)

In [179]:
trees_agg_bgb.head()

Unnamed: 0,unique_id,belowground_CO2e_per_ha,belowground_tC_per_ha
0,100A1,77.784965,21.214081
1,100B1,24.201018,6.600278
2,100C1,271.491682,74.043186
3,100D1,56.801476,15.491312
4,101A1,4.001692,1.091371


In [180]:
trees = trees_agg_agb.merge(trees_agg_bgb, on="unique_id", how="left")

In [181]:
trees.head()

Unnamed: 0,unique_id,aboveground_CO2e_per_ha,aboveground_tC_per_ha,belowground_CO2e_per_ha,belowground_tC_per_ha
0,100A1,216.069347,58.928004,77.784965,21.214081
1,100B1,67.225049,18.334104,24.201018,6.600278
2,100C1,754.143561,205.675517,271.491682,74.043186
3,100D1,157.781877,43.031421,56.801476,15.491312
4,101A1,11.115811,3.031585,4.001692,1.091371


## Export data and Upload to BQ

In [182]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 615 entries, 0 to 614
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   unique_id                615 non-null    object 
 1   aboveground_CO2e_per_ha  615 non-null    float64
 2   aboveground_tC_per_ha    615 non-null    float64
 3   belowground_CO2e_per_ha  615 non-null    float64
 4   belowground_tC_per_ha    615 non-null    float64
dtypes: float64(4), object(1)
memory usage: 24.2+ KB


# Calculate sapling biomass

In [183]:
saplings = vmd0001_eq1(saplings, is_sapling=True)

In [184]:
# Calculate corrected radius for sapling nest based on slope (in radians)
corrected_radius = 2 / np.cos(plot_info["slope_radians"])

In [185]:
# Calculate new total subplot area based on corrected radius
plot_info["corrected_sapling_area_m2"] = np.pi * corrected_radius**2

In [186]:
saplings = saplings.merge(
    plot_info[["unique_id", "corrected_sapling_area_m2"]], on="unique_id"
)

In [187]:
saplings = vmd0001_eq2b(saplings)

In [188]:
saplings.head()

Unnamed: 0,unique_id,count_saplings,litter_bag_weight,litter_sample_weight,ntv_bag_weight,ntv_sample_weight,aboveground_carbon_tonnes,corrected_sapling_area_m2,CO2e_per_ha
0,308D1,2.0,70.0,770.0,70.0,870.0,43.24,13.231132,11.98285
1,308A1,,50.0,260.0,50.0,110.0,0.0,12.778742,0.0
2,308B1,7.0,50.0,560.0,50.0,120.0,151.34,16.793698,33.042951
3,308C1,2.0,50.0,769.0,50.0,710.0,43.24,14.889893,10.647939
4,249B1,25.0,60.0,260.0,50.0,150.0,540.5,15.834884,125.156167


In [189]:
saplings["saplings_tC_per_ha"] = (
    saplings["aboveground_carbon_tonnes"] / saplings["corrected_sapling_area_m2"]
)

In [190]:
saplings = saplings[["unique_id", "CO2e_per_ha", "saplings_tC_per_ha"]].copy()

In [191]:
saplings.rename(columns={"CO2e_per_ha": "sapling_CO2e_per_ha"}, inplace=True)

In [192]:
saplings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   unique_id            671 non-null    object 
 1   sapling_CO2e_per_ha  619 non-null    float64
 2   saplings_tC_per_ha   619 non-null    float64
dtypes: float64(2), object(1)
memory usage: 15.9+ KB


# Add saplings to aboveground biomass

In [193]:
trees = trees.merge(saplings, on="unique_id", how="left")

In [194]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 615 entries, 0 to 614
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   unique_id                615 non-null    object 
 1   aboveground_CO2e_per_ha  615 non-null    float64
 2   aboveground_tC_per_ha    615 non-null    float64
 3   belowground_CO2e_per_ha  615 non-null    float64
 4   belowground_tC_per_ha    615 non-null    float64
 5   sapling_CO2e_per_ha      615 non-null    float64
 6   saplings_tC_per_ha       615 non-null    float64
dtypes: float64(6), object(1)
memory usage: 33.8+ KB


In [195]:
trees["total_aboveground_CO2e_per_ha"] = (
    trees["aboveground_CO2e_per_ha"] + trees["sapling_CO2e_per_ha"]
)

In [196]:
# Upload to BQ
if len(trees) != 0:
    trees.to_csv(CARBON_STOCK_OUTDIR / "trees_carbon_stock.csv", index=False)
    pandas_gbq.to_gbq(
        trees,
        f"{DATASET_ID}.trees_carbon_stock",
        project_id=GCP_PROJ_ID,
        if_exists=IF_EXISTS,
        progress_bar=True,
    )
else:
    raise ValueError("Dataframe is empty.")

100%|██████████| 1/1 [00:00<00:00, 15592.21it/s]
