# Example: EC data


In [None]:
import pandas as pd
import numpy as np
import glob

import sys
sys.path.append("C:/Users/my_user/Documents/Github/ecophys_utils/")
from ecophys_utils import *

In [None]:
# Data location
project_path = './'
data_path = project_path + '../data/'

# Input path
ec_path     = data_path + 'EC/'

# Define the location
lon = 10
lat = -20
alt = 100
timezone = 'Africa/Johannesburg'  # Adjust based on the location

In [None]:
print('Load data:')
df = load_all_eddypro(ec_path, dataset='full_output')
biomet = load_all_eddypro(ec_path, dataset='biomet')
df = df.merge(biomet, how='left', on='timestamp')

print('Clean up data:')
df['Tau']      = flagged_data_removal_ep(df, 'Tau', (df['qc_Tau'] >= 2))
df['H']        = flagged_data_removal_ep(df, 'H', (df['qc_H'] >= 2))
df['LE']       = flagged_data_removal_ep(df, 'LE', (df['qc_LE'] >= 2))
df['co2_flux'] = flagged_data_removal_ep(df, 'co2_flux', (df['qc_co2_flux'] >= 2))
df['h2o_flux'] = flagged_data_removal_ep(df, 'h2o_flux', (df['h2o_flux'] >= 2))

print('Done...')

In [None]:
print('  - Calculating Day/Night')
# Apply the function to create the day-night column
df['day_night'] = is_day(df['timestamp'], lat=lat, lon=lon, tz=timezone)

df['nee'] = calculate_nee(df['co2_flux'], df['co2_strg'])

print('Done...')

### $u_*$ filtering

From Papale et al. (2006):
- Only night-time data below the threshold was dismissed
- For the determination of the $u_*$ threshold, the data set is split into six temperature classes of equal sample size (according to quantiles).
- For each temperature class, the set is split into 20 equally sized $u_*$ classes.
- The threshold is defined as the $u_*$-class where the average night-time flux reaches more than 99% of the average flux at $u_*$-classes that are higher than the current class.
- The threshold is only accepted if for the temperature class, temperature and $u_*$ are not or only weakly correlated ($|r|<0.4$).
- The final threshold is defined as the median of the thresholds of the (up to) six temperature classes.

This procedure is applied to the subsets of four 3-month periods (January–March, April–June, JulySeptember and October–December) to account for seasonal variation of vegetation structure.

For each period, the $u_*$-threshold is reported, but the whole data set is filtered according to the highest threshold found (conservative approach). In cases where no $u_*$-threshold could be found, it is set to $0.4 m s^{-1}$. A minimum threshold is set to $0.1 m s^{-1}$ for forest canopies and $0.01 m s^{-1}$ for short vegetation sites that commonly have lower u∗ values (Papale et al. 2006).

In [None]:
# Add year variable
df['year'] = df['timestamp'].dt.year
df['season'] = create_season_southern_hemisphere(df['timestamp'])

# Split in before & after the fire
df_prefire  = df.loc[df['timestamp'] <= '2021-03-01']
df_postfire = df.loc[df['timestamp'] > '2021-03-01']

# Calculate seasonal thresholds
thresholds_prefire  = create_seasonal_uStar_threshold_list(df_prefire, groupby=['year', 'season'])
thresholds_postfire = create_seasonal_uStar_threshold_list(df_postfire, groupby=['year', 'season'])

# Calculate overall threshold
threshold_prefire  = calculate_overall_uStar_threshold(thresholds_prefire, missing_fraction = 0.75, use_mean=True)
threshold_postfire = calculate_overall_uStar_threshold(thresholds_postfire, missing_fraction = 0.75, use_mean=True)
print('Pre-fire u* threshold:  ', threshold_prefire)
print('Post-fire u* threshold: ', threshold_postfire)

# Apply thresholds, remove NEE, LE, H
df['nee_f'] = df['nee']
df['co2_flux_f'] = df['co2_flux']
df['co2_strg_f'] = df['co2_strg']
df['h2o_flux_f'] = df['h2o_flux']
df['H_f'] = df['H']
df['LE_f'] = df['LE']
df.loc[(df['timestamp'] <= '2021-03-01') & (df['u*'] <= threshold_prefire), 'nee_f'] = np.nan
df.loc[(df['timestamp'] > '2021-03-01') & (df['u*'] <= threshold_postfire), 'nee_f'] = np.nan
df.loc[(df['timestamp'] <= '2021-03-01') & (df['u*'] <= threshold_prefire), 'co2_flux_f'] = np.nan
df.loc[(df['timestamp'] > '2021-03-01') & (df['u*'] <= threshold_postfire), 'co2_flux_f'] = np.nan
df.loc[(df['timestamp'] <= '2021-03-01') & (df['u*'] <= threshold_prefire), 'co2_strg_f'] = np.nan
df.loc[(df['timestamp'] > '2021-03-01') & (df['u*'] <= threshold_postfire), 'co2_strg_f'] = np.nan
df.loc[(df['timestamp'] <= '2021-03-01') & (df['u*'] <= threshold_prefire), 'h2o_flux_f'] = np.nan
df.loc[(df['timestamp'] > '2021-03-01') & (df['u*'] <= threshold_postfire), 'h2o_flux_f'] = np.nan
df.loc[(df['timestamp'] <= '2021-03-01') & (df['u*'] <= threshold_prefire), 'H_f'] = np.nan
df.loc[(df['timestamp'] > '2021-03-01') & (df['u*'] <= threshold_postfire), 'H_f'] = np.nan
df.loc[(df['timestamp'] <= '2021-03-01') & (df['u*'] <= threshold_prefire), 'LE_f'] = np.nan
df.loc[(df['timestamp'] > '2021-03-01') & (df['u*'] <= threshold_postfire), 'LE_f'] = np.nan

print('Done applying u* filter...')

### Simplest Respiration estimation

In [None]:
# Simple interpolation, only valid in tropics
df['reco_simple_interp'] = respiration_from_nighttime_simple_interpolated(df, dn_col='day_night', nee_col='nee_f')
df['reco_simple_blocks'] = respiration_from_nighttime_simple_blocks(df, dn_col='day_night', nee_col='nee_f')

df['gpp'] = calculate_gpp(df['nee_f'], df['reco_simple_interp'])