### Create data files
#### extract & rename variables used in this analysis from raw Ameriflux and NEON data
#### Using AMeriFlux variable naming convention (and units)
#### LH June 2024

In [13]:
import sys
import os, glob
import csv 
import calendar
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
from datetime import datetime, timedelta
import pandas as pd
import xarray as xr

### NEON

In [10]:
varlist = ['LE_F_MDS','H_F_MDS','TA_F','VPD_F','SW_IN_F','NEE_VUT_50','GPP_NT_VUT_50','RECO_NT_VUT_50']
units = ['W m-2','W m-2','C','hPa','W m-2','umol m-2 s-1','umol m-2 s-1','umol m-2 s-1']

In [6]:
### ABBY
in_dir = '../data_raw/'
filename=os.path.join(in_dir+'AMF_US-xAB_FLUXNET_SUBSET_HH_2019-2024_5-7.csv') 
df = pd.read_csv(filename,index_col=0,parse_dates=True,header=0,skiprows = [i for i in range(1, 3) ])
df[df==-9999]=np.NaN

In [11]:
# write
df_out = df[varlist]
df_out.rename(columns={'TA_F': 'TA','LE_F_MDS':'LH', 'H_F_MDS':'H', 'SW_IN_F': 'SW_IN','NEE_VUT_50':'NEE','GPP_NT_VUT_50':'GPP','RECO_NT_VUT_50':'RECO'}, inplace=True)
df_out.to_csv('../data/NEON-ABBY.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [12]:
### WREF
in_dir = '../data_raw/'
filename=os.path.join(in_dir+'AMF_US-xWR_FLUXNET_SUBSET_HH_2019-2024_3-7.csv') 
df = pd.read_csv(filename,index_col=0,parse_dates=True,header=0,skiprows = [i for i in range(1, 3) ])
df[df==-9999]=np.NaN

# write
df_out = df[varlist]
df_out.rename(columns={'TA_F': 'TA','LE_F_MDS':'LH', 'H_F_MDS':'H', 'SW_IN_F': 'SW_IN','NEE_VUT_50':'NEE','GPP_NT_VUT_50':'GPP','RECO_NT_VUT_50':'RECO'}, inplace=True)
df_out.to_csv('../data/NEON-WREF.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


### CA-Ca3

In [58]:
varlist = ['LH','H','TA','VPD','SW_IN','NEE','GPP','RECO']

In [59]:
# US-Ca3
in_dir = '../data_raw/'
filename = os.path.join(in_dir+'AMF_CA-Ca3_BASE_HH_6-5.csv')
df = pd.read_csv(filename,header=2,index_col=0, parse_dates=True, squeeze=True)
df[df==-9999]=np.NaN

# SW
st = pd.datetime(2011,1,1,0,0,0)
df['SW_IN'] = df['SW_IN_PI_F_1']
df['SW_IN'][st:] = df['SW_IN_1_2_1'][st:]

# GPP
df['GPP'] = df['GPP_PI_F']
df['GPP'][st:] = df['GPP_PI_F_1'][st:]
df['GPP'][df['GPP']<0] = np.NaN

# RECO
df['RECO'] = df['RECO_PI_F']
df['RECO'][st:] = df['RECO_PI_F_1'][st:]

# TA
st = pd.datetime(2017,1,1,0,0,0)
df['TA'] = df['TA_1_3_1']
df['TA'][st:] = df['TA_1_6_2'][st:]

df['VPD'] = df.VPD_PI


# ADD gap filled NEE & LE
in_dir = '../data_raw/'
filename = os.path.join(in_dir+'CA3_REddyPro.txt')
df_gf = pd.read_csv(filename,sep=r"\s+",header=0,skiprows=[1], parse_dates=True, squeeze=True)
df_gf[df_gf==-9999]=np.NaN
ts = (
    pd.to_datetime(df_gf['Year'].astype(int), format='%Y') +
    pd.to_timedelta(df_gf['DoY'] - 1, unit='D') +
    pd.to_timedelta(df_gf['Hour'], unit='h')
)

df_gf = df_gf.set_index(ts)


df['NEE'] = df_gf['NEE_gf']
df['NEE'][df['NEE']>100] = np.NaN

df['LH'] = df_gf.LE_gf


  st = pd.datetime(2011,1,1,0,0,0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['SW_IN'][st:] = df['SW_IN_1_2_1'][st:]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['GPP'][st:] = df['GPP_PI_F_1'][st:]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['GPP'][df['GPP']<0] = np.NaN
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['RECO'][st:] = df

In [60]:
df_out = df[varlist]
df_out.to_csv('../data/CA-Ca3.csv')

### US-Me6

In [14]:
varlist = ['LH','H','TA','RH','SW_IN','NEE','GPP','RECO']

In [73]:
# US-Me6
in_dir = '../data_raw/'
filename = os.path.join(in_dir+'AMF_US-Me6_BASE_HH_16-5.csv')
df = pd.read_csv(filename,header=2,index_col=0, parse_dates=True, squeeze=True)
df[df==-9999]=np.NaN

In [74]:
filename = os.path.join(in_dir+'USMe6_2020_fluxmet_Linnia.csv')
df_me6_2020 = pd.read_csv(filename,header=2,index_col=None, parse_dates=False, squeeze=True)
df_me6_2020.index = pd.date_range(pd.datetime(2019,12,31,0,0,0),pd.datetime(2020,12,30,23,30,0),freq='30min')

  df_me6_2020.index = pd.date_range(pd.datetime(2019,12,31,0,0,0),pd.datetime(2020,12,30,23,30,0),freq='30min')


In [75]:
filename = os.path.join(in_dir+'USMe6_2021_fluxmet_Linnia.csv')
df_me6_2021 = pd.read_csv(filename,header=2,index_col=None, parse_dates=False, squeeze=True)
df_me6_2021.index = pd.date_range(pd.datetime(2021,1,1,0,0,0),pd.datetime(2021,12,31,23,30,0),freq='30min')

  df_me6_2021.index = pd.date_range(pd.datetime(2021,1,1,0,0,0),pd.datetime(2021,12,31,23,30,0),freq='30min')


In [76]:

# GPP
df['GPP'] = df['GPP_PI']
st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
df['GPP'][st:en] = -df_me6_2020['GEP_umolm-2s-1'][st:en]
st = pd.datetime(2021,1,1,0,0,0); en = pd.datetime(2021,12,31,23,30,0)
df['GPP'][st:en] = -df_me6_2021['GEP_umolm-2s-1'][st:en]

# RECO
df['RECO'] = df['RECO_PI']
st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
df['RECO'][st:en] = df_me6_2020['Re_umolm-2s-1'][st:en]
st = pd.datetime(2021,1,1,0,0,0); en = pd.datetime(2021,12,31,23,30,0)
df['RECO'][st:en] = df_me6_2021['Re_umolm-2s-1'][st:en]

# H
st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
df['H'][st:en] = df_me6_2020['Hmeas_Wm-2'][st:en]
st = pd.datetime(2021,1,1,0,0,0); en = pd.datetime(2021,12,31,23,30,0)
df['H'][st:en] = df_me6_2021['Hmeas_Wm-2'][st:en]

# TA
df['TA'] = df['TA_1_1_2']
st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
df['TA'][st:en] = df_me6_2020['Tair_towertop_degC'][st:en]
st = pd.datetime(2021,1,1,0,0,0); en = pd.datetime(2021,12,31,23,30,0)
df['TA'][st:en] = df_me6_2021['Tair_towertop_degC'][st:en]

# RH
st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
df['RH'][st:en] = df_me6_2020['RH_towertop_%'][st:en]
st = pd.datetime(2021,1,1,0,0,0); en = pd.datetime(2021,12,31,23,30,0)
df['RH'][st:en] = df_me6_2021['RH_towertop_%'][st:en]

  st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['GPP'][st:en] = -df_me6_2020['GEP_umolm-2s-1'][st:en]
  st = pd.datetime(2021,1,1,0,0,0); en = pd.datetime(2021,12,31,23,30,0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['GPP'][st:en] = -df_me6_2021['GEP_umolm-2s-1'][st:en]
  st = pd.datetime(2020,1,1,0,0,0); en = pd.datetime(2020,12,30,23,30,0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['RECO'][st:en] = df_me6_2020['Re_umolm-2

In [15]:
in_dir = '../data_raw/'
filename = os.path.join(in_dir+'ME6_REddyPro.txt')
df_gf = pd.read_csv(filename,sep=r"\s+",header=0,skiprows=[1], parse_dates=False, squeeze=True)
df_gf[df_gf==-9999]=np.NaN
ts = (
    pd.to_datetime(df_gf['Year'].astype(int), format='%Y') +
    pd.to_timedelta(df_gf['DoY'] - 1, unit='D') +
    pd.to_timedelta(df_gf['Hour'], unit='h')
)

df_gf = df_gf.set_index(ts)

In [81]:
df['NEE'] = df_gf['NEE_gf']
df['LH'] = df_gf['LE_gf']

In [82]:
df_out = df[varlist]
df_out.to_csv('../data/US-Me6.csv')