# Plot confidence interval changes

# Imports and Set-up

In [1]:
# Standard Imports
import sys
import os
import pandas as pd
import numpy as np

# Google Cloud Imports
import pandas_gbq

In [3]:
# Util imports
sys.path.append("../../")  # include parent directory
from src.settings import (
    DATA_DIR,
)

## Load data

### Trees data

In [None]:
if TREES_CSV.exists():
    trees = pd.read_csv(TREES_CSV)
else:
    query = f"""
    SELECT 
        * 
    FROM {GCP_PROJ_ID}.{DATASET_ID}.trees"""

    # Read the BigQuery table into a dataframe
    trees = pandas_gbq.read_gbq(query, project_id=GCP_PROJ_ID)
    trees.to_csv(TREES_CSV, index=False)

In [39]:
import pandas as pd
import glob

csv_files = glob.glob(
    "/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/*.csv"
)
dataframes = {}

for file in csv_files:
    file_name = file.split("/")[-1].split(".")[
        0
    ]  # Extract the file name without extension
    df = pd.read_csv(file)
    dataframes[file_name] = df

In [17]:
ci_avg_substitution = dataframes["ci_avg_substitution"]

In [57]:
ci_avg_substitution

Unnamed: 0,Strata,carbon_type,t_CO2_ha,lower_ci,upper_ci,margin_err_perc_90,margin_err_perc_95,standard_error_perc
0,1,AGB,1162.72,986.94,1338.5,0.1512,0.1801,0.0919
1,2,AGB,684.83,589.71,779.95,0.1389,0.1655,0.0844
2,5,AGB,250.96,,,,,
3,1,BGB,278.9,236.71,321.08,0.1513,0.1802,0.092
4,2,BGB,164.16,141.32,186.99,0.1391,0.1657,0.0846
5,5,BGB,60.19,,,,,
6,1,dead_tree_carbon,24.14,19.93,28.36,0.1745,0.2079,0.1061
7,2,dead_tree_carbon,24.29,17.45,31.12,0.2815,0.3354,0.1711
8,5,dead_tree_carbon,12.69,,,,,
9,1,litter_carbon,3.91,3.33,4.48,0.1467,0.1749,0.0892


In [34]:
ci_avg_substitution.drop(columns=["Unnamed: 0"], inplace=True)

In [35]:
ci_avg_substitution.drop(columns=["carbon_type"]).groupby("Strata").mean().reset_index()

Unnamed: 0,Strata,t_CO2_ha,lower_ci,upper_ci,margin_err_perc_90,margin_err_perc_95,standard_error_perc
0,1,294.26,249.658,338.86,0.15504,0.18472,0.09426
1,2,175.786,150.668,200.898,0.17224,0.20524,0.10472
2,5,66.692,,,,,


In [78]:
ci_avg_substitution.sort_values(by="Strata")

Unnamed: 0,Strata,carbon_type,t_CO2_ha,lower_ci,upper_ci,margin_err_perc_90,margin_err_perc_95,standard_error_perc
0,1,AGB,1162.72,986.94,1338.5,0.1512,0.1801,0.0919
3,1,BGB,278.9,236.71,321.08,0.1513,0.1802,0.092
6,1,dead_tree_carbon,24.14,19.93,28.36,0.1745,0.2079,0.1061
9,1,litter_carbon,3.91,3.33,4.48,0.1467,0.1749,0.0892
12,1,ntv_carbon,1.63,1.38,1.88,0.1515,0.1805,0.0921
1,2,AGB,684.83,589.71,779.95,0.1389,0.1655,0.0844
4,2,BGB,164.16,141.32,186.99,0.1391,0.1657,0.0846
7,2,dead_tree_carbon,24.29,17.45,31.12,0.2815,0.3354,0.1711
10,2,litter_carbon,3.79,3.35,4.22,0.1143,0.1362,0.0695
13,2,ntv_carbon,1.86,1.51,2.21,0.1874,0.2234,0.114


# droping trees >150

In [47]:
dataframes.keys()

dict_keys(['ci_avg_substitution', 'ci_capped_150', 'emission_factors_biomass_inventories_remove_trees_150', 'emission_factors_biomass_inventories_avg_substitution', 'ci_remove_trees_150', 'emission_factors_biomass_inventories_capped_150'])

In [52]:
csv_files

['/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/ci_avg_substitution.csv',
 '/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/ci_capped_150.csv',
 '/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/emission_factors_biomass_inventories_remove_trees_150.csv',
 '/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/emission_factors_biomass_inventories_avg_substitution.csv',
 '/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/ci_remove_trees_150.csv',
 '/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/emission_factors_biomass_inventories_capped_150.csv']

In [53]:
ci_remove_trees_150 = pd.read_csv(
    "/Users/renflores/Documents/OneBase/data/csv/test_confidence_intervals/ci_remove_trees_150.csv"
)

In [54]:
ci_remove_trees_150.drop(columns=["Unnamed: 0"], inplace=True)

In [55]:
ci_remove_trees_150.drop(columns=["carbon_type"]).groupby("Strata").mean().reset_index()

Unnamed: 0,Strata,t_CO2_ha,lower_ci,upper_ci,margin_err_perc_90,margin_err_perc_95,standard_error_perc
0,1,292.744,248.306,337.182,0.15512,0.18484,0.09432
1,2,174.104,149.114,199.09,0.17248,0.20556,0.10488
2,5,66.692,,,,,


In [59]:
merged_df = ci_remove_trees_150.merge(
    ci_avg_substitution,
    on=["Strata", "carbon_type"],
    suffixes=("_remove_trees_150", "_avg_substitution"),
)

In [74]:
(
    merged_df["margin_err_perc_95_remove_trees_150"]
    - merged_df["margin_err_perc_95_avg_substitution"]
)

0     0.0003
1     0.0008
2        NaN
3     0.0003
4     0.0008
5        NaN
6     0.0000
7     0.0000
8        NaN
9     0.0000
10    0.0000
11       NaN
12    0.0000
13    0.0000
14       NaN
dtype: float64

In [65]:
ci_capped_150 = dataframes["ci_capped_150"]

In [70]:
merged_df = merged_df.merge(ci_capped_150, on=["Strata", "carbon_type"])

In [76]:
merged_df["margin_err_perc_95"] - merged_df["margin_err_perc_95_avg_substitution"]

0     0.0012
1     0.0022
2        NaN
3     0.0012
4     0.0023
5        NaN
6    -0.0006
7     0.0005
8        NaN
9     0.0003
10   -0.0003
11       NaN
12   -0.0002
13    0.0000
14       NaN
dtype: float64

In [None]:
dataframes["ci_capped_150"]

In [79]:
dataframes.keys()

dict_keys(['ci_avg_substitution', 'ci_capped_150', 'emission_factors_biomass_inventories_remove_trees_150', 'emission_factors_biomass_inventories_avg_substitution', 'ci_remove_trees_150', 'emission_factors_biomass_inventories_capped_150'])

In [80]:
ef_avg_substitution = dataframes[
    "emission_factors_biomass_inventories_avg_substitution"
]

In [81]:
ef_avg_substitution.drop(columns=["Unnamed: 0"], inplace=True)

In [83]:
ef_avg_substitution.columns

Index(['unique_ID', 'plot', 'lc_type', 'plot_type', 'latitude', 'longitude',
       'corrected_plot_area_m2', 'AGB_biomass', 'BGB_biomass', 'litter_carbon',
       'ntv_carbon', 'stump_biomass', 'ldw_biomass', 'dead_tree_biomass',
       'Strata', 'LCC', 'Bgy_Name', 'Mun_Name', 'Pro_Name', 'start', 'Team',
       'total_dead_wood', 'n'],
      dtype='object')