# Table 1

This script loads the processed and combined data of PV and afforestation and their desert backgrounds, and calculates summer midday values (10:00 - 15:00) of Rn, H, LE, Lout, Ts, Ta, DT, and rH.

In [1]:
import pandas as pd
import numpy as np
import glob
import os

### Input and output paths

In [2]:
project_path = './'
input_path   = '../data/'
output_path  = '../data/'

## Functions

In [3]:
def averaging(temp):
    
    # define mid-day
    temp = temp.loc[(temp['DateTime'].dt.hour >= 10) & (temp['DateTime'].dt.hour < 15)].copy()
    
    # define mid-night
    #temp = temp.loc[(temp['DateTime'].dt.hour >= 0) & (temp['DateTime'].dt.hour < 4)].copy()
    
    #print('before',len(temp.index))
    # uStar filter
    #temp = temp.loc[(temp['uStar'] >= 0.2)].copy()
    #print('after',len(temp.index))

    # Make mean and std dev
    df_means = temp.groupby(['Season','Ecosystem']).mean().reset_index()
    df_sds   = temp.groupby(['Season','Ecosystem']).std().reset_index()
    # rename columns
    df_means.rename(columns={'H': 'H_mean'}, inplace=True)
    df_means.rename(columns={'LE': 'LE_mean'}, inplace=True)
    df_means.rename(columns={'Rn': 'Rn_mean'}, inplace=True)
    df_means.rename(columns={'Ta': 'Ta_mean'}, inplace=True)
    df_means.rename(columns={'Ts': 'Ts_mean'}, inplace=True)
    df_means.rename(columns={'D_T': 'D_T_mean'}, inplace=True)
    df_means.rename(columns={'Pa': 'Pa_mean'}, inplace=True)
    df_means.rename(columns={'H2O': 'H2O_mean'}, inplace=True)
    df_means.rename(columns={'Lout': 'Lout_mean'}, inplace=True)
    df_means.rename(columns={'Lin': 'Lin_mean'}, inplace=True)
    df_means.rename(columns={'rho': 'rho_mean'}, inplace=True)
    df_means.rename(columns={'cp': 'cp_mean'}, inplace=True)
    df_means.rename(columns={'rH': 'rH_mean'}, inplace=True)
    df_means.rename(columns={'SWin': 'SWin_mean'}, inplace=True)
    df_means.rename(columns={'SWout': 'SWout_mean'}, inplace=True)

    df_sds.rename(columns={'H': 'H_sd'}, inplace=True)
    df_sds.rename(columns={'LE': 'LE_sd'}, inplace=True)
    df_sds.rename(columns={'Rn': 'Rn_sd'}, inplace=True)
    df_sds.rename(columns={'Ta': 'Ta_sd'}, inplace=True)
    df_sds.rename(columns={'Ts': 'Ts_sd'}, inplace=True)
    df_sds.rename(columns={'D_T': 'D_T_sd'}, inplace=True)
    df_sds.rename(columns={'Pa': 'Pa_sd'}, inplace=True)
    df_sds.rename(columns={'H2O': 'H2O_sd'}, inplace=True)
    df_sds.rename(columns={'Lout': 'Lout_sd'}, inplace=True)
    df_sds.rename(columns={'Lin': 'Lin_sd'}, inplace=True)
    df_sds.rename(columns={'rho': 'rho_sd'}, inplace=True)
    df_sds.rename(columns={'cp': 'cp_sd'}, inplace=True)
    df_sds.rename(columns={'rH': 'rH_sd'}, inplace=True)
    df_sds.rename(columns={'SWin': 'SWin_sd'}, inplace=True)
    df_sds.rename(columns={'SWout': 'SWout_sd'}, inplace=True)
    
    merged = df_means.merge(df_sds, on=['Season','Ecosystem'])
    
    # Keep only relevant columns
    merged = merged[['Season','Ecosystem','H_mean','LE_mean','Rn_mean','Ta_mean','Ts_mean','D_T_mean','Pa_mean','H2O_mean','Lout_mean','Lin_mean','rho_mean','cp_mean',\
                     'H_sd','LE_sd','Rn_sd','Ta_sd','Ts_sd','D_T_sd','Pa_sd','H2O_sd','Lout_sd','Lin_sd','rho_sd','cp_sd','rH_mean','rH_sd','SWin_mean','SWin_sd','SWout_mean','SWout_sd']]
    
    return(merged)

## Load data

In [4]:
df = pd.read_csv(input_path + 'dataset.csv')
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y-%m-%d %H:%M:%S', utc=True)
display(df)

Unnamed: 0,DateTime,Month,Year,Ecosystem,H,LE,Pa,RH,TA_merge,VPD,...,Ta,Lemitted,emissivity,PVe,Rn,Ts,D_T,cp,rho,rH
0,2019-07-10 00:30:00+00:00,July,2019,PV desert background,-18.79140,-2.500320,99081.800,25.52,305.52,3606.03,...,32.369995,423.670173,0.87,0.0,-92.40201,31.270731,-1.099264,1012.948384,1.083521,64.204776
1,2019-07-10 01:00:00+00:00,July,2019,PV desert background,-7.42804,-6.111050,99077.000,25.76,305.02,3494.33,...,31.869995,421.319794,0.87,0.0,-92.81302,30.847644,-1.022351,1012.790980,1.086618,151.468532
2,2019-07-10 01:30:00+00:00,July,2019,PV desert background,-16.05200,-3.831300,99064.600,24.80,304.67,3470.02,...,31.520020,419.386825,0.87,0.0,-93.99899,30.498366,-1.021653,1012.386117,1.088297,70.124219
3,2019-07-10 02:00:00+00:00,July,2019,PV desert background,-12.27440,8.551300,99052.400,24.92,304.17,3367.42,...,31.020020,417.317094,0.87,0.0,-94.19299,30.123034,-0.896985,1012.210480,1.091237,80.718851
4,2019-07-10 02:30:00+00:00,July,2019,PV desert background,-13.91630,-5.565570,99041.300,26.79,303.69,3194.86,...,30.540009,415.351335,0.87,0.0,-94.87900,29.765262,-0.774746,1012.480210,1.094591,61.698455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37931,2020-08-31 21:30:00+00:00,August,2020,Afforestation,-9.60000,-16.956398,93209.330,,,,...,23.680000,,,,,,,1021.611400,1.079329,
37932,2020-08-31 22:00:00+00:00,August,2020,Afforestation,-9.30000,-11.220856,93201.010,,,,...,23.443333,,,,,,,1021.600486,1.080576,
37933,2020-08-31 22:30:00+00:00,August,2020,Afforestation,-9.60000,22.113760,93188.625,,,,...,23.343333,,,,,,,1021.426533,1.080792,
37934,2020-08-31 23:00:00+00:00,August,2020,Afforestation,,,93180.766,,,,...,23.523333,,,,,,,1019.405333,1.077225,


In [5]:
temp = df.copy()
# When both H and D_T are too small, ignore
temp.loc[(np.abs(temp['H']) < 5) | (np.abs(temp['D_T']) < 2), 'rH'] = np.nan
# When H and D_T are too close to each other as values, ignore
temp.loc[(np.abs(temp['H']) <= (np.abs(temp['D_T']) + 2)) &
                (np.abs(temp['H']) >= (np.abs(temp['D_T']) - 2)), 'rH'] = np.nan

# At night, we sometimes get rH < 0. Remove
temp.loc[(temp['rH'] < 0) & (temp['SWin'] < 5), 'rH'] = np.nan
# In fact, negative values are not supposed to be valid at all
temp.loc[(temp['rH'] < 0), 'rH'] = np.nan

mean_df = averaging(temp)

# Create a text of summarised values (mean + stddev)
mean_df['Rn'] = mean_df['Rn_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['Rn_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['H'] = mean_df['H_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['H_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['LE'] = mean_df['LE_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['LE_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['Ta'] = mean_df['Ta_mean'].astype(float).round(1).astype(str) + ' (' + mean_df['Ta_sd'].round(1).astype(str) + ')'
mean_df['Ts'] = mean_df['Ts_mean'].astype(float).round(1).astype(str) + ' (' + mean_df['Ts_sd'].round(1).astype(str) + ')'
mean_df['D_T'] = mean_df['D_T_mean'].astype(float).round(1).astype(str) + ' (' + mean_df['D_T_sd'].round(1).astype(str) + ')'
mean_df['rH'] = mean_df['rH_mean'].astype(float).round(0).astype(str) + ' (' + mean_df['rH_sd'].astype(float).round(0).astype(str) + ')'
mean_df['Lout'] = mean_df['Lout_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['Lout_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['Lin'] = mean_df['Lin_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['Lin_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['SWout'] = mean_df['SWout_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['SWout_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['SWin'] = mean_df['SWin_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['SWin_sd'].astype(float).round(0).astype(int).astype(str) + ')'

# Remove the original values
mean_df.drop(['H_mean','H_sd','LE_mean','LE_sd','Rn_mean','Rn_sd','Ta_mean','Ta_sd','Ts_mean','Ts_sd','D_T_mean','D_T_sd','Pa_mean','rho_mean','cp_mean',\
              'Pa_sd','H2O_mean','H2O_sd','Lout_mean','Lout_sd','Lin_mean','Lin_sd','rho_sd','cp_sd','rH_mean','rH_sd','SWin_mean','SWin_sd','SWout_mean','SWout_sd'], axis=1, inplace=True)
mean_df = mean_df.loc[mean_df['Season'] == 'Summer']

# Convert to long format
out_df = mean_df.pivot(index='Season', columns='Ecosystem').stack(level=[0])
out_df.reset_index(inplace=True)
out_df.drop(['Season'], axis=1, inplace=True)
out_df.rename(columns={'level_1': 'Parameter'}, inplace=True)
display(out_df)

Ecosystem,Parameter,Afforestation,Afforestation desert background,PV desert background,PV field
0,D_T,3.9 (1.8),8.2 (3.0),9.1 (1.3),9.9 (2.5)
1,H,246 (114),134 (59),173 (37),178 (61)
2,LE,129 (56),122 (58),10 (8),12 (17)
3,Lin,309 (28),310 (21),346 (14),357 (20)
4,Lout,414 (28),452 (31),519 (13),507 (29)
5,Rn,470 (172),434 (109),344 (53),456 (97)
6,SWin,662 (237),778 (184),854 (100),770 (137)
7,SWout,81 (28),202 (48),338 (41),119 (21)
8,Ta,18.0 (4.8),18.2 (4.2),30.9 (2.3),28.3 (4.7)
9,Ts,21.6 (5.8),26.4 (5.4),39.9 (2.0),38.2 (4.5)
