# Calculate emission factors and confidence intervals

# Imports and Set-up

In [72]:
# Standard Imports
import sys
import pandas as pd

# Google Cloud Imports
import pandas_gbq

In [73]:
# Util imports
sys.path.append("../../")  # include parent directory
from src.settings import GCP_PROJ_ID, CARBON_STOCK_OUTDIR, CARBON_POOLS_OUTDIR

In [74]:
# Variables
PLOT_INFO_CSV = CARBON_POOLS_OUTDIR / "plot_info.csv"
LITTER_CSV = CARBON_STOCK_OUTDIR / "litter_carbon_stock.csv"
NTV_CSV = CARBON_STOCK_OUTDIR / "ntv_carbon_stock.csv"
DEADWOOD_CSV = CARBON_STOCK_OUTDIR / "deadwood_carbon_stock.csv"
TREES_CSV = CARBON_STOCK_OUTDIR / "trees_carbon_stock.csv"

# BigQuery Variables
SRC_DATASET_ID = "biomass_inventory"
DATASET_ID = "carbon_stock"
IF_EXISTS = "replace"

## Load data

### Plot Data

In [75]:
if PLOT_INFO_CSV.exists():
    plot_info = pd.read_csv(PLOT_INFO_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.plot_info"""

    # Read the BigQuery table into a dataframe
    plot_info = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    plot_info.to_csv(PLOT_INFO_CSV, index=False)

In [76]:
plot_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Data columns (total 31 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   unique_id                  671 non-null    object 
 1   data_recorder              671 non-null    object 
 2   team_no                    671 non-null    int64  
 3   plot_code_nmbr             671 non-null    int64  
 4   plot_type                  671 non-null    object 
 5   sub_plot                   671 non-null    object 
 6   yes_no                     671 non-null    object 
 7   sub_plot_shift             633 non-null    object 
 8   GPS_waypt                  633 non-null    float64
 9   GPS_id                     633 non-null    float64
 10  GPS                        576 non-null    object 
 11  GPS_latitude               576 non-null    float64
 12  GPS_longitude              576 non-null    float64
 13  GPS_altitude               576 non-null    float64

### Trees

In [77]:
if TREES_CSV.exists():
    trees = pd.read_csv(TREES_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.trees_carbon_stock"""

    # Read the BigQuery table into a dataframe
    trees = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    trees.to_csv(TREES_CSV, index=False)

In [78]:
trees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 615 entries, 0 to 614
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   unique_id                615 non-null    object 
 1   aboveground_CO2e_per_ha  615 non-null    float64
 2   aboveground_tC_per_ha    615 non-null    float64
 3   belowground_CO2e_per_ha  615 non-null    float64
 4   belowground_tC_per_ha    615 non-null    float64
dtypes: float64(4), object(1)
memory usage: 24.2+ KB


### Deadwood

In [79]:
if DEADWOOD_CSV.exists():
    deadwood = pd.read_csv(DEADWOOD_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.deadwood_carbon_stock"""

    # Read the BigQuery table into a dataframe
    deadwood = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    deadwood.to_csv(DEADWOOD_CSV, index=False)

In [80]:
deadwood.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 570 entries, 0 to 569
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   unique_id                    570 non-null    object 
 1   stumps_tonnes_dry_matter_ha  440 non-null    float64
 2   ldw_tonnes_dry_matter_ha     476 non-null    float64
 3   sdw_tonnes_dry_matter_ha     191 non-null    float64
 4   all_tonnes_dry_matter_ha     570 non-null    float64
 5   deadwood_tC_per_ha           570 non-null    float64
 6   deadwood_CO2e_per_ha         570 non-null    float64
dtypes: float64(6), object(1)
memory usage: 31.3+ KB


### Litter

In [81]:
if LITTER_CSV.exists():
    litter = pd.read_csv(LITTER_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.litter_carbon_stock"""

    # Read the BigQuery table into a dataframe
    litter = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    litter.to_csv(LITTER_CSV, index=False)

In [82]:
litter.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   unique_id             671 non-null    object 
 1   litter_biomass_kg     619 non-null    float64
 2   litter_kg_dry_matter  619 non-null    float64
 3   litter_CO2e_per_ha    619 non-null    float64
dtypes: float64(3), object(1)
memory usage: 21.1+ KB


### Non-tree Vegetation

In [83]:
if NTV_CSV.exists():
    ntv = pd.read_csv(NTV_CSV)
else:
    query = f"""
    SELECT
        * 
    FROM {GCP_PROJ_ID}.{SRC_DATASET_ID}.ntv_carbon_stock"""

    # Read the BigQuery table into a dataframe
    ntv = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    ntv.to_csv(NTV_CSV, index=False)

In [84]:
ntv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   unique_id          671 non-null    object 
 1   ntv_biomass_kg     619 non-null    float64
 2   ntv_kg_dry_matter  619 non-null    float64
 3   ntv_CO2e_per_ha    619 non-null    float64
dtypes: float64(3), object(1)
memory usage: 21.1+ KB


# Create subplot level summary

In [85]:
merged_df = plot_info[[]].merge(trees, on="unique_id", how="left")
merged_df = merged_df.merge(deadwood, on="unique_id", how="left")
merged_df = merged_df.merge(ntv, on="unique_id", how="left")
merged_df = merged_df.merge(litter, on="unique_id", how="left")

In [91]:
merged_df.head(2)

Unnamed: 0,unique_id,data_recorder,team_no,plot_code_nmbr,plot_type,sub_plot,yes_no,sub_plot_shift,GPS_waypt,GPS_id,...,sdw_tonnes_dry_matter_ha,all_tonnes_dry_matter_ha,deadwood_tC_per_ha,deadwood_CO2e_per_ha,ntv_biomass_kg,ntv_kg_dry_matter,ntv_CO2e_per_ha,litter_biomass_kg,litter_kg_dry_matter,litter_CO2e_per_ha
0,308D1,Steve,1,308,primary,sub_plotD,yes,no_shift,7.0,1.0,...,,16.323138,7.671875,28.130208,0.8,0.12,8.272,0.8,0.12,6.512
1,308A1,Steve,1,308,primary,sub_plotA,yes,no_shift,8.0,1.0,...,,148.919571,69.992199,256.638062,0.06,0.009,0.6204,0.06,0.009,0.4884


In [92]:
merged_df.columns

Index(['unique_id', 'data_recorder', 'team_no', 'plot_code_nmbr', 'plot_type',
       'sub_plot', 'yes_no', 'sub_plot_shift', 'GPS_waypt', 'GPS_id', 'GPS',
       'GPS_latitude', 'GPS_longitude', 'GPS_altitude', 'GPS_precision',
       'photo', 'manual_reason', 'lc_type', 'lc_class', 'lc_class_other',
       'disturbance_yesno', 'disturbance_type', 'disturbance_class', 'slope',
       'canopy_avg_height', 'canopy_cover', 'access_reason', 'slope_radians',
       'corrected_plot_area_n2_m2', 'corrected_plot_area_n3_m2',
       'corrected_plot_area_n4_m2', 'aboveground_CO2e_per_ha',
       'aboveground_tC_per_ha', 'belowground_CO2e_per_ha',
       'belowground_tC_per_ha', 'stumps_tonnes_dry_matter_ha',
       'ldw_tonnes_dry_matter_ha', 'sdw_tonnes_dry_matter_ha',
       'all_tonnes_dry_matter_ha', 'deadwood_tC_per_ha',
       'deadwood_CO2e_per_ha', 'ntv_biomass_kg', 'ntv_kg_dry_matter',
       'ntv_CO2e_per_ha', 'litter_biomass_kg', 'litter_kg_dry_matter',
       'litter_CO2e_per_ha'],
