# Imports and Set-up

In [1]:
# Standard Imports
import sys
import os
import pandas as pd
import numpy as np

# Google Cloud Imports
import pandas_gbq

In [2]:
# Util imports
sys.path.append("../../")  # include parent directory
from src.settings import (
    DATA_DIR,
    GCP_PROJ_ID,
    CARBON_POOLS_OUTDIR,
    CARBON_STOCK_OUTDIR,
    SPECIES_LOOKUP_CSV,
)

# from src.biomass_equations import vmd0003_eq1

In [3]:
# Variables
TREES_CSV = CARBON_POOLS_OUTDIR / "trees.csv"
SAPLING_CSV = CARBON_POOLS_OUTDIR / "saplings_ntv_litter.csv"
PLOT_INFO_CSV = CARBON_POOLS_OUTDIR / "plot_info.csv"

# BigQuery Variables
DATASET_ID = "carbon_stock"
IF_EXISTS = "replace"

## Load data

### Trees data

In [4]:
if TREES_CSV.exists():
    trees = pd.read_csv(TREES_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{DATASET_ID}.trees"""

    # Read the BigQuery table into a dataframe
    trees = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    trees.to_csv(TREES_CSV, index=False)

In [5]:
trees.rename(
    columns={"species_name": "code_species", "family_name": "code_family"}, inplace=True
)

In [6]:
trees.loc[trees["code_species"] == 999, "code_species"] = np.nan

In [7]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6579 entries, 0 to 6578
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   unique_id     6579 non-null   object 
 1   nest          6579 non-null   int64  
 2   code_species  4993 non-null   float64
 3   code_family   1330 non-null   float64
 4   DBH           6579 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 257.1+ KB


In [8]:
trees.head(2)

Unnamed: 0,unique_id,nest,code_species,code_family,DBH
0,308D2,2,,25.0,10.8
1,308D2,2,,25.0,17.3


### Tree species

In [24]:
species = pd.read_csv(SPECIES_LOOKUP_CSV)

In [25]:
species.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 375 entries, 0 to 374
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   code_family      74 non-null     float64
 1   family           74 non-null     object 
 2   scientific_name  375 non-null    object 
 3   local_name       375 non-null    object 
 4   code_species     375 non-null    int64  
 5   corrected_genus  375 non-null    object 
 6   wood_density     375 non-null    float64
dtypes: float64(2), int64(1), object(4)
memory usage: 20.6+ KB


In [11]:
species.head(2)

Unnamed: 0,code_family,family,scientific_name,local_name,code_species,corrected_genus,wood_density
0,999.0,Unknown,Litchi chinensis,Alupag - amo,193,Litchi,0.608902
1,1.0,Alangiaceae,Alangium javanicum,Putian,15,Alangium,0.608902


## Add species using lookup table

Wood density in this table was generated using [BIOMASS](https://www.rdocumentation.org/packages/BIOMASS/versions/2.1.11) library from R

In [12]:
merged_df = trees.merge(species, on="code_family", how="left")

In [13]:
# add family name based on lookup file
trees["family_name"] = merged_df["family"]

In [14]:
merged_df = trees.merge(species, on="code_species", how="left")

In [15]:
# add species name based on lookup file
trees["scientific_name"] = merged_df["scientific_name"]

# Add wood density based on scientific name
trees["wood_density"] = merged_df["wood_density"]

In [16]:
trees.fillna({"scientific_name": "Unknown", "family_name": "Unknown"}, inplace=True)

In [17]:
trees[(trees["code_species"].isna()) & (trees["code_family"])]

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,family_name,scientific_name,wood_density
0,308D2,2,,25.0,10.8,Fabaceae,Unknown,
1,308D2,2,,25.0,17.3,Fabaceae,Unknown,
2,308D2,2,,25.0,12.8,Fabaceae,Unknown,
3,308D2,2,,25.0,28.1,Fabaceae,Unknown,
8,308B2,2,,25.0,45.3,Unknown,Unknown,
...,...,...,...,...,...,...,...,...
6499,125C2,4,,41.0,122.6,Unknown,Unknown,
6500,125C2,4,,41.0,53.2,Unknown,Unknown,
6503,125A2,4,,33.0,56.8,Unknown,Unknown,
6514,147B2,4,,41.0,52.6,Unknown,Unknown,


In [18]:
unique_family_names = trees.loc[trees["code_species"].isna(), "family_name"].unique()
print(unique_family_names)

['Fabaceae' 'Unknown' 'Dipterocarpaceae' 'Sapotaceae' 'Annonaceae']


In [19]:
trees.loc[
    (trees["code_family"].isna()) & (trees["code_species"].isna()),
]

Unnamed: 0,unique_id,nest,code_species,code_family,DBH,family_name,scientific_name,wood_density
4,308A2,2,,,18.70,Unknown,Unknown,
5,308A2,2,,,20.20,Unknown,Unknown,
6,308A2,2,,,14.70,Unknown,Unknown,
7,308A2,2,,,12.00,Unknown,Unknown,
32,22C2,2,,,20.40,Unknown,Unknown,
...,...,...,...,...,...,...,...,...
6466,399C2,4,,,84.90,Unknown,Unknown,
6470,399A2,4,,,62.00,Unknown,Unknown,
6472,203D2,4,,,69.55,Unknown,Unknown,
6530,80B2,4,,,58.30,Unknown,Unknown,


In [20]:
species[species["family"] == "Sapotaceae"]

Unnamed: 0,code_family,family,scientific_name,local_name,code_species,corrected_genus,wood_density
62,62.0,Sapotaceae,Glebionis coronaria,Tungkao,152,Glebionis,0.714869


In [21]:
test = pd.read_csv("/Users/renflores/Documents/OneBase/data/csv/trees_processed_r.csv")

In [22]:
test[(test["species_name"] == "Unknown") & (test["family_name"] == "Unknown")]

Unnamed: 0,unique_ID,nest,team,species_name,family_name,DBH,slope,latitude,longitude,lc_type,lc_class,genusCorr,WD,H,Strata,biomass,root_biomass
12,101A2,2,3,Unknown,Unknown,11.1,39,8.679330,126.050833,f_plant,MX,Unknown,0.626893,13.253394,1,274.179798,65.803151
13,101B2,3,3,Unknown,Unknown,30.0,8,8.679171,126.051785,f_plant,MX,Unknown,0.626893,22.779659,1,3239.518939,777.484545
14,101C2,2,3,Unknown,Unknown,19.1,19,8.678523,126.048674,f_nat,MCB,Unknown,0.626893,17.927968,1,1062.177730,254.922655
17,101C2,2,3,Unknown,Unknown,29.0,19,8.678523,126.048674,f_nat,MCB,Unknown,0.626893,22.395658,1,2982.184688,715.724325
18,101C2,3,3,Unknown,Unknown,62.3,19,8.678523,126.048674,f_nat,MCB,Unknown,0.626893,30.000000,1,17647.563609,4235.415266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6311,86B2,3,2,Unknown,Unknown,30.9,18,9.081891,126.031082,f_nat,MCB,Unknown,0.597575,23.115867,1,3322.366626,797.367990
6359,97B2,3,4,Unknown,Unknown,60.3,48,8.791470,125.993984,f_nat,MDF,Unknown,0.651367,30.000000,1,17189.233220,4125.415973
6442,98C2,3,1,Unknown,Unknown,40.9,42,,,f_nat,MCB,Unknown,0.626893,26.316477,1,6829.673266,1639.121584
6457,98D2,4,1,Unknown,Unknown,57.5,32,,,f_nat,MCB,Unknown,0.626893,29.951426,1,15067.071829,3616.097239


In [23]:
unknown_wood_density = test[
    (test["species_name"] == "Unknown") & (test["family_name"] == "Unknown")
]["WD"].unique()
print(unknown_wood_density)

[0.62689306 0.63654755 0.58324631 0.56415561 0.65374774 0.71276048
 0.61345774 0.76557815 0.65994282 0.6296433  0.70241661 0.68340839
 0.6662754  0.57938183 0.67751888 0.64848263 0.64371242 0.37244234
 0.63275243 0.61081898 0.44770306 0.5188693  0.48636364 0.70101373
 0.66824248 0.45499492 0.60090105 0.6562678  0.64183324 0.61621167
 0.56569511 0.60613125 0.56534708 0.66997691 0.65125871 0.61459908
 0.6429952  0.59757549 0.65136686 0.62810107]
