# Create output

In [None]:
import pandas as pd
import numpy as np
import glob

In [None]:
project_path = './'

input_fn = 'all_data.csv'

output_fn = 'data_full.csv'
output_summarised_fn = 'all_summarised'

### Stomatal conductance correction factor

In [None]:
def diffusion_coef(molar_mass_g_mol, collision_diam_angstrom, collision_integral, temperature_C=23, pressure_atm=1):
    R  = 8.31446261815  # Gas const.  [J K−1 mol−1] or [kg m2 s−2 K−1 mol−1]
    NA = 6.02214076*10**23  # Avogadro const. [mol-1]
    Ma = 28.964         # Molar mass of the air [g mol−1]

    # Collision diameter of the molecules [Å]
    sigma_a = 3.617     # Collision diameter of air [Å]
    sigma_ag = (sigma_a + collision_diam_angstrom)/2 # [Å]

    # Unit conversion
    Ma_kgmol = Ma/1000                # [g mol-1] to [kg mol-1]
    Mg_kgmol = molar_mass_g_mol/1000  # [g mol-1] to [kg mol-1]
    sigma_ag_m = sigma_ag * 10**(-10)    # [Å] to [m]
    P_Pa = pressure_atm*101325        # [atm] to [Pa]
    T_K = temperature_C + 273.15      # [°C] to [K]

    # Chapman-Enskog equation (Bird et al. 2007) [m2 s-1]
    D_ag = 3/16 * np.sqrt( 2*(R*T_K)**3/np.pi * (1/Ma_kgmol + 1/Mg_kgmol)) * 1/(NA * P_Pa * sigma_ag_m**2 * collision_integral)
    
    return(D_ag)

def collision_integral(eK_gas1, eK_gas2, temperature_C=23):
    e_K = np.sqrt(eK_gas1*eK_gas2)
    
    T_K = temperature_C + 273.15
    TK_e = T_K / e_K

    Omega = 1.06036/TK_e**0.15610 + 0.19300/np.exp(0.47635*TK_e) + 1.03587/np.exp(1.52996*TK_e) + 1.76474/np.exp(3.89411*TK_e)
    return(Omega)

eK_air    = 97
eK_h2o    = 358   # Lin et al. 2004
eK_co     = 110
eK_co2    = 190
sigma_h2o = 2.725  # Lin et al. 2004
sigma_co  = 3.590
sigma_co2 = 3.996
M_h2o     = 18.015
M_co      = 28.010
M_co2     = 44.010

Omega_air_h2o = collision_integral(eK_air, eK_h2o, temperature_C=23)
D_air_h2o = diffusion_coef(M_h2o, sigma_h2o, Omega_air_h2o, temperature_C=23, pressure_atm=1)
Omega_air_co = collision_integral(eK_air, eK_co, temperature_C=23)
D_air_co = diffusion_coef(M_co, sigma_co, Omega_air_co, temperature_C=23, pressure_atm=1)
R_h2o_co = D_air_h2o/D_air_co

# Create an output table and show it, for Methods S5 (may need to be renumbered)
cond_frac_table = pd.DataFrame([['H2O', M_h2o, sigma_h2o, eK_h2o], ['CO', M_co, sigma_co, eK_co], ['CO2', M_co2, sigma_co2, eK_co2]], columns =['Gas', 'M', 'sigma', 'eK'])
cond_frac_table['Omega'] = collision_integral(eK_air,
                                              cond_frac_table['eK'])
cond_frac_table['D_ag'] = diffusion_coef(cond_frac_table['M'],
                                         cond_frac_table['sigma'],
                                         cond_frac_table['Omega'])*1e4 # Multiplication with 10e4 converts from [m2 s-1] to [cm2 s-1]
cond_frac_table['R_ag'] = cond_frac_table.loc[cond_frac_table['Gas'] == 'H2O', 'D_ag'].values/cond_frac_table['D_ag']
display(cond_frac_table)

### Process data and save

In [None]:
print('Loading data...')

df = pd.read_csv(project_path + input_fn)

print('  - Preparing full dataset')

# Filter data
df = df.loc[(df['timestamp'] >= '2020-09-01 00:00') & (df['timestamp'] < '2021-09-01 00:00')].copy()
df = df.loc[df['status'] == 'cc'].copy()
df = df.loc[~df['rain'].isin(['Post-rain', 'Rain'])].copy()

# Rename columns
df.rename(columns={'plot': 'treatment',
                   'flux.co.ch_oc.nmol_m2_s': 'co.flux',
                   'flux.h2o.ch_oc.mmol_m2_s': 'Tr',
                   'flux.co2.ch_oc.umol_m2_s1': 'co2.flux',
                   'conc_ci.co.nmol_mol':'co.ci',
                   'par.current.chamber.umol_m2_s1': 'PAR',
                   'par.ambient.umol_m2_s1': 'PAR_above_canopy',
                   'temp.leaf.current.chamber.c.oc': 'TL',
                   'temp.air.current.chamber.c': 'TA',
                   'VPD.Pa.oc': 'VPD',
                   'swc_10_30cm': 'SWC'},
          inplace=True)
#df.loc[df['treatment'] == 'irr', 'treatment'] = 'Irrigated'
#df.loc[df['treatment'] == 'ctr', 'treatment'] = 'Droughted'

# Mass-based adjustement of conductance for CO
df['g_tCO'] = df['g_tw']*R_h2o_co

# Select only relevant ones
out_df = df[['timestamp','season','treatment','co.flux', 'co2.flux','Tr','PAR','PAR_above_canopy','TL','TA','VPD','SWC','g_tw','g_tCO','co.ci']]

# Save full dataset
print('    - Saving to ' + output_fn)
out_df.to_csv(output_fn, index=False)

# Create summarised dataset
print('  - Summarising dataset to daily scale')

print('    - Removing unnecessary data before saving')
df['timestamp'] = pd.to_datetime(df['timestamp'])
temp = df.loc[(df['timestamp'] >= '2020-09-01 00:00') & (df['timestamp'] < '2021-09-01 00:00')].copy()
temp = temp.loc[temp['status'] == 'cc'].copy()
temp = temp.loc[~(temp['rain'].isin(['Post-rain', 'Rain'])),].copy()

temp['doy']      = temp['timestamp'].dt.strftime('%j')
temp['time']     = temp['timestamp'].dt.strftime('%H:%M')
temp['time_num'] = temp['timestamp'].dt.strftime('%H').astype(float) + temp['timestamp'].dt.strftime('%M').astype(float)/60

# Initial data filtering
temp = temp.loc[(temp['status'] == 'cc') & (temp['type'] == 'branch')].copy()
temp = temp.loc[~(temp['rain'].isin(['Post-rain', 'Rain'])),].copy()

# Save daily file, midday means
temp2 = temp.drop(['type','time','chamber', 'status', 'rain','chamber_block'], axis=1)
temp2 = temp2.loc[(temp2['PAR_above_canopy'] > 50),].copy() # Make sure we use only daytime with enough light
temp2 = temp2.loc[(temp2['timestamp'].dt.hour > 10) & (temp2['timestamp'].dt.hour < 15),].copy()
daily = temp2.groupby(['season', 'doy', 'treatment'], observed=True).median()#'time_num',
daily.reset_index(inplace=True)
daily_fn = output_summarised_fn + '_daily.csv'
print('    - Saving to ' + daily_fn)
daily.to_csv(daily_fn, index=False)

print('Done...')