# Setup

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import pickle
import sys
from pathlib import Path

# Add the src directory to the path
sys.path.insert(0, '/Users/yew/code/neon-agbd/')


from neon_agbd.vst.main import compute_site_biomass_full, ALL_SITES

In [2]:
def process_site(site_id: str, output_dir: str = "./output") -> dict:
    """
    Process a single NEON site and save results.

    Parameters
    ----------
    site_id : str
        Four-character NEON site code (e.g., 'SJER', 'HARV')
    output_dir : str
        Directory to save output files

    Returns
    -------
    dict
        Dictionary containing all output tables and metadata
    """
    # Ensure output directory exists
    Path(output_dir).mkdir(exist_ok=True)

    csvs_output_dir = Path(output_dir) / "csvs"
    csvs_output_dir.mkdir(parents=True, exist_ok=True)

    print(f"\n{'='*60}")
    print(f"Processing site: {site_id}")
    print(f"{'='*60}\n")

    # Run the full workflow
    output = compute_site_biomass_full(
        site_id=site_id,
        dp1_data_dir="../data/DP1.10098",
        agb_data_dir="../data/NEONForestAGB",
        plot_polygons_path="../data/plot_polygons/NEON_TOS_Plot_Polygons.geojson",
        apply_gap_filling=True,
        apply_dead_corrections=True,
        verbose=True
    )

    # Save as pickle (dictionary)
    pkl_file = Path(output_dir) / f"{site_id}.pkl"
    with open(pkl_file, 'wb') as f:
        pickle.dump(output, f)
    print(f"\nPickle file saved: {pkl_file}")

    # Save individual DataFrames as CSVs for easy inspection
    csv_files = {
        'plot_biomass': f"{site_id}_plot_biomass.csv",
        'unaccounted_trees': f"{site_id}_unaccounted_trees.csv",
        'individual_trees': f"{site_id}_individual_trees.csv"
    }

    for key, filename in csv_files.items():
        filepath = Path(csvs_output_dir) / filename
        output[key].to_csv(filepath, index=False)
        print(f"CSV saved: {filepath}")

    # Print summary
    print(f"\n{'='*60}")
    print("Summary:")
    print(f"{'='*60}")
    print(f"  Site: {output['site_id']}")
    print(f"  Number of plots: {output['metadata']['n_plots']}")
    print(f"  Plot-year combinations: {output['metadata']['n_plot_years']}")
    print(f"  Unaccounted trees: {output['metadata']['n_unaccounted_trees']}")
    print(f"  Individual tree records: {output['metadata']['n_individual_tree_records']}")

    # Show sample of each output
    if not output['plot_biomass'].empty:
        print(f"\nPlot biomass table columns:")
        print(f"  {list(output['plot_biomass'].columns)}")
        print(f"\nSample rows:")
        print(output['plot_biomass'].head(3).to_string())

    if not output['unaccounted_trees'].empty:
        print(f"\nUnaccounted trees by status:")
        print(output['unaccounted_trees']['status'].value_counts().to_string())

    return output

In [None]:
dpids= ['DELA','LENO','TALL','BONA','DEJU','HEAL','SRER','SJER','SOAP',
              'TEAK','CPER','NIWO','RMNP','DSNY','OSBS','JERC','PUUM','KONZ',
              'UKFS','SERC','HARV','UNDE','BART','JORN','DCFS','NOGP','WOOD',
              'GUAN','LAJA','GRSM','ORNL','CLBJ','MOAB','ONAQ','BLAN','MLBS',
              'SCBI','ABBY','WREF','STEI','TREE','YELL']

for site_id in dpids:
	process_site(site_id, output_dir="./results/")

In [4]:
test = process_site(site_id = 'ABBY', output_dir="./results/vst/")


Processing site: ABBY

Processing site: ABBY
  Loading DP1.10098 data...
  Loading NEONForestAGB data...
  Merging AGB estimates with apparent individual data...
  Loading plot area data...
  Found 130 plot-year combinations from vst_perplotperyear
  Categorizing individuals (tree vs small_woody)...
  Applying dead status corrections...
  Identifying unaccounted trees...
  Computing plot-level biomass...
  Calculating growth metrics...
  Creating interpolated time series tables...
  Creating individual tree table...
  Done! Computed biomass for 130 plot-year combinations.
  Found 411 unaccounted trees.
  Created individual tree table with 4258 records.

Pickle file saved: results/vst/ABBY.pkl
CSV saved: results/vst/csvs/ABBY_plot_biomass.csv
CSV saved: results/vst/csvs/ABBY_unaccounted_trees.csv
CSV saved: results/vst/csvs/ABBY_individual_trees.csv

Summary:
  Site: ABBY
  Number of plots: 34
  Plot-year combinations: 130
  Unaccounted trees: 411
  Individual tree records: 4258

Plot 

In [5]:
test.keys()

dict_keys(['categoricalCodes_10098', 'citation_10098_RELEASE-2025', 'issueLog_10098', 'readme_10098', 'validation_10098', 'variables_10098', 'vst_apparentindividual', 'vst_mappingandtagging', 'vst_non-woody', 'vst_perplotperyear', 'vst_shrubgroup', 'plot_biomass', 'unaccounted_trees', 'individual_trees', 'plot_jenkins_ts', 'plot_chojnacky_ts', 'plot_annighofer_ts', 'site_id', 'metadata'])