## Setup

In [None]:
import os
import rioxarray as rxr
import xarray as xr
import numpy as np
import pandas as pd
#import neonutilities as nu
import geopandas as gpd 
import matplotlib.pyplot as plt
import pickle

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
import sys
sys.path.append('/code/chloris/NEON/')
from neon_fns import make_veg_gdf
from neon_fns import *
# from neon import make_veg_gdf

In [None]:
site_names = ['DELA','LENO','TALL','BONA','DEJU','HEAL','SRER','SJER','SOAP',
              'TEAK','CPER','NIWO','RMNP','DSNY','OSBS','JERC','PUUM','KONZ',
              'UKFS','SERC','HARV','UNDE','BART','JORN','DCFS','NOGP','WOOD',
              'GUAN','LAJA','GRSM','ORNL','CLBJ','MOAB','ONAQ','BLAN','MLBS',
              'SCBI','ABBY','WREF','STEI','TREE','YELL']

In [None]:
bio_df = pd.read_parquet('s3://chloris-data-us-west-2/projects/NEON/All_trees_biomass.parquet')

In [None]:
len(bio_df.plotID.unique())

In [None]:
bio_df['year'] = [date_i[:4] for date_i in bio_df.date]

summary = bio_df.groupby("plotID")["year"].nunique().reset_index(name="n_years")
print(summary)

In [None]:
counts = summary['n_years'].value_counts().sort_index()
print(counts)

In [None]:
# import time 
# import requests
# from requests.adapters import HTTPAdapter
# from urllib3.util.retry import Retry

# # Set up a session with retries
# session = requests.Session()
# retries = Retry(
#     total=5,                # Total retry attempts
#     backoff_factor=0.5,     # Wait time between retries: 0.5, 1, 2, 4, etc.
#     status_forcelist=[500, 502, 503, 504],  # Retry on these HTTP status codes
# )
# adapter = HTTPAdapter(max_retries=retries)
# session.mount('http://', adapter)
# session.mount('https://', adapter)

site_name = 'DELA'
# Read the site pickle
with open('/data/chloris/NEON/DP1.10098/' + site_name + '.pkl', 'rb') as f:
	veg_dict = pickle.load(f)

# Make the veg gdf 
veg_gdf = make_veg_gdf(veg_dict)

In [None]:
vst

## Read the datasets 

In [None]:
# site = 'UNDE'
site = 'DELA'

# Column names to keep 
simple_cols = ['date_AI','individualID','scientificName','taxonID','family',
               'growthForm','plantStatus','plotID_AI','pointID','stemDiameter',
               'canopyPosition','height','maxBaseCrownDiameter','stemEasting','stemNorthing','geometry']

# valid plantStatus codes 
valid_statuses = ['Live', 'Live,  other damage',
				'Live, broken bole', 'Live, disease damaged',
				'Live, insect damaged', 'Live, physically damaged']


vst = gpd.read_parquet(f"/data/chloris/NEON/VST/{site}_single_bole_trees.parquet")

# # Easier set of columns to work with
vst = vst[simple_cols]

# # Filter to valid plantStatus codes
vst = vst[vst['plantStatus'].isin(valid_statuses)]

vst['year'] = [int(str(x).split('-')[0]) for x in vst['date_AI']]

vst_2022 = vst[vst['year']==2022].copy()


In [None]:
biomass_df = pd.read_parquet('s3://chloris-data-us-west-2/projects/NEON/All_trees_biomass.parquet')

In [None]:
vst

In [None]:
biomass_df

### Some handy plots

In [None]:
plt.hist(vst['height'], bins=30,edgecolor='black')
plt.title(f'Histogram of Tree Heights at {site}')
plt.xlabel('Height (m)')
plt.ylabel('Number of Trees')
plt.show()

print(f"CHM years available: {chm_years}")

plt.hist(vst['year'],edgecolor='black')
plt.title(f'Number of trees surveyed per year at {site}')
plt.xlabel('Year')
plt.ylabel('Number of Trees')
plt.show()

chm.plot.imshow(figsize = (12,10),cmap = 'turbo',vmax = 30)

In [None]:
chm.rio.crs