## Setup

In [None]:
branch = "dev"

import os
import sys
from pathlib import Path
import toml
import subprocess
import warnings
import dask
from datetime import datetime
from dask_gateway import Gateway

gitlab_token = toml.load(os.path.join(Path.home(), ".gitlab_token"))
# gitlab_token = toml.load(.gitlab_token"))

proc = subprocess.Popen(
    [sys.executable, "-m", "pip", "install"]
    + [
        "--no-dependencies",  # the docker image already has all agbd deps installed
        "--upgrade",  # override the version already in the image (may not be needed)
        "--force-reinstall",  # always reinstall
        "--no-cache-dir",  # disable caching
        f"""git+https://{gitlab_token['TOKEN_NAME']}:{gitlab_token['ACCESS_TOKEN']}@gitlab.com/chloris-geospatial/data-science/chloris-agbd.git@{branch}""",
    ],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
)
stdout, stderr = proc.communicate()
returncode = proc.wait()
if returncode:
    raise Exception("Installation failed!", stderr)
print(f"Installation succeeded {datetime.now()}")

In [13]:
import os
import numpy as np
import pandas as pd
import neonutilities as nu
import geopandas as gpd 

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [10]:
import sys
sys.path.append('/code/chloris/NEON/')
from neon_fns import make_veg_gdf
# from neon import make_veg_gdf

In [11]:
site_names = ['DELA','LENO','TALL','BONA','DEJU','HEAL','SRER','SJER','SOAP',
              'TEAK','CPER','NIWO','RMNP','DSNY','OSBS','JERC','PUUM','KONZ',
              'UKFS','SERC','HARV','UNDE','BART','JORN','DCFS','NOGP','WOOD',
              'GUAN','LAJA','GRSM','ORNL','CLBJ','MOAB','ONAQ','BLAN','MLBS',
              'SCBI','ABBY','WREF','STEI','TREE','YELL']

In [14]:
flist = os.listdir('/data/chloris/NEON/CHM/')[1:]
site = [fi.split('_')[0] for fi in flist]
year = [fi.split('_')[1] for fi in flist]

df = pd.DataFrame({'site': site, 'year': year, 'fname': flist})
df.value_counts('site')

site
CLBJ    9
DSNY    8
DELA    8
KONZ    8
JERC    8
OSBS    8
TALL    8
LENO    8
UKFS    8
SJER    7
TEAK    7
DCFS    7
BART    7
MOAB    7
WOOD    7
SOAP    7
HARV    7
SERC    6
SCBI    6
ABBY    6
SRER    6
STEI    6
TREE    6
UNDE    6
RMNP    6
NIWO    6
MLBS    6
HEAL    6
GRSM    6
DEJU    6
BONA    6
BLAN    6
ONAQ    5
JORN    5
CPER    5
WREF    5
YELL    5
ORNL    4
PUUM    3
NOGP    2
LAJA    2
GUAN    2
Name: count, dtype: int64

In [15]:
flist = os.listdir('/data/chloris/NEON/VST/')
flist = [fi for fi in flist if fi.endswith('_single_bole_trees.parquet')]

total = 0

df_list = []

for fname in flist:

	df = pd.read_parquet(os.path.join('/data/chloris/NEON/VST/', fname))
	df_list.append(df)
	total = total + df.shape[0]
	
df = pd.concat(df_list, ignore_index=True)
print(total)
len(np.unique(df.individualID))

94624


  df = pd.concat(df_list, ignore_index=True)


28676

In [18]:
site_name  = 'UNDE'
print(len(np.unique(df[df.siteID_MAT == site_name].individualID)))
df[df.siteID_MAT == site_name].shape

1450


(5353, 86)

In [None]:
# In Alabama, we can get 2 sites within 1 MGRS tile. 
# TALL has 1136 unique trees that were measured a total of 5160 times. 
# DELA has 1228 unique trees there were measured a total of 4024 times. 

# Up in Michigan, UNDE has 1450 unique trees, 5353 total measurements. 

In [None]:
fname = os.path.join('/data/chloris/NEON/VST/', flist[0])

gpd.read_parquet(fname).to_file('/data/chloris/NEON/DELA.fgb')