# NEON Tables 
## Table2 : neon sites characteristics 

##### Author : Negin Sobhani negins@ucar.edu [@negin513](https://github.com/negin513)
#### Last updated: 2022-10-04
_______



In [1]:
import os
import sys
import time

import numpy as np
import pandas as pd
import xarray as xr

from glob import glob
from os.path import join, expanduser

import matplotlib
import matplotlib.pyplot as plt

from scipy import stats
import tqdm
from neon_utils import download_eval_files
import matplotlib.colors as colors
import datetime


In [2]:
print('xarray '+xr.__version__) ##-- was working with 0.20.0

xarray 0.20.2


## Dask Cluster
The following will spin up a dask cluster,  only works on NCAR machines. 
4 workers worked best for NEON.

In [3]:
from distributed import Client
from ncar_jobqueue import NCARCluster

cluster = NCARCluster()
cluster.scale(4)
client = Client(cluster)
client

  from distributed.utils import tmpfile
Perhaps you already have a cluster running?
Hosting the HTTP server on port 38573 instead
  f"Port {expected} is already in use.\n"


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/negins/proxy/38573/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/negins/proxy/38573/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.60:32957,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/negins/proxy/38573/status,Total threads: 0
Started: Just now,Total memory: 0 B


-------

In [4]:
# -- read list of sites:
neon_sites_pft = pd.read_csv('/glade/scratch/negins/preprocessed_neon_csv_data/neon_sites_dompft.csv')
neon_sites = neon_sites_pft['Site'].to_list()

failed_sites = [] # -- list for saving failed sites

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
all_stats = []
all_sites = []
years = ["2018","2019","2020","2021"]

for neon_site in tqdm.tqdm(neon_sites):
    try:
        atm_dir = "/glade/work/negins/neon_scripts/notebooks/atm_files_2/"
        atm_path = os.path.join(atm_dir, neon_site)
        
        atm_files = []

        for year in years:
             atm_files.extend(sorted(glob(join(atm_path,neon_site+"_atm_"+year+"*.nc"))))
                                          
        start = time.time()

        ds_atm = xr.open_mfdataset(atm_files, decode_times=True, combine='by_coords')

        end = time.time()        
        
        atm_vars = ['TBOT','RH','WIND','PRECTmms','FSDS','FLDS']
        df = pd.DataFrame({'time':ds_atm.time})

        for var in atm_vars:
            field = np.ravel ( ds_atm[var])     
            df[var]=field
        
        this_site = df.mean()
        all_stats.append(this_site)
        all_sites.append(neon_site)
            
    except:
        print ('THIS SITE FAILED:', neon_site)


100%|██████████| 47/47 [01:28<00:00,  1.89s/it]


In [9]:
df_all = (pd.concat(all_stats, axis = 1)).transpose()


In [12]:
df_all ['site']= all_sites

df_all= df_all.set_index('site')

In [13]:
df_all

Unnamed: 0_level_0,TBOT,RH,WIND,PRECTmms,FSDS,FLDS
site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BART,280.850267,80.275728,2.271633,3.846977e-05,151.805017,310.145646
HARV,281.692679,80.740584,2.855056,4.454394e-05,158.272411,312.732567
BLAN,286.22468,78.242193,1.567727,4.117981e-05,175.712545,334.600628
SCBI,286.156724,78.245865,3.134835,3.67638e-05,173.076151,329.026877
SERC,287.419465,80.55694,3.265165,3.783099e-05,174.089394,335.984598
DSNY,295.765935,79.598703,2.399117,4.343519e-05,211.825488,389.674054
JERC,293.114918,78.454698,2.855097,4.491291e-05,196.84083,370.927748
OSBS,294.406726,77.944898,2.861323,4.499068e-05,199.049906,377.873535
GUAN,299.492679,76.88441,3.520361,1.770103e-05,262.968333,400.100462
LAJA,298.607555,81.881698,2.419477,3.116756e-05,240.659625,412.00739


In [14]:
out_dir = "/glade/work/negins/neon_vis_plots/paper_plots_final/"

if not os.path.exists (out_dir):
    os.mkdir(out_dir)
    
out_file = os.path.join(out_dir, 'table2.csv')
print ('saving table in ', out_file)

df_all.to_csv(out_file)

saving table in  /glade/work/negins/neon_vis_plots/paper_plots_final/table2.csv
