# Table with for Ketura Science paper

The objective of this script is to automatically generate the radiative fluxes table of this paper, and export it to LaTeX, with the proper t-tests.

In [1]:
import pandas as pd
import openpyxl
import numpy as np
import glob
from plotnine import *
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
from scipy import signal
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
from scipy import stats
import os
from plotnine.data import mtcars
from functools import reduce
from itertools import product, combinations

### Input and output paths

In [2]:
project_path = 'D:/Users/Rafaels/Dropbox/Doutorado/Working/Ketura/'   # Weizmann desktop
#project_path = 'C:/Users/rafas/Dropbox/Doutorado/Working/Ketura/'     # notebook
input_path   = project_path + '01_data/'
output_path  = project_path + '02_output/'
graphs_path  = project_path + '03_graphs/'

# Functions

- Explanation for the calculation of $T_S$:
  - Long eq., i.e. what is measured by a longwave sensor, ignoring the air column: $L_{sensor} = \varepsilon L_{out} + (1 - \varepsilon) L_{in} = \varepsilon\sigma T^4_s + (1 - \varepsilon) L_{in}$
  - Solving the long eq. for $T_s$ yields: $ T_s = \sqrt[4]{\frac{L_{sensor} - (1 - \varepsilon) L_{in}}{\varepsilon~\sigma}} = \sqrt[4]{\frac{L_{sensor}}{\varepsilon\sigma} - \frac{L_{in}}{\varepsilon\sigma} + \frac{L_{in}}{\sigma} }$ (i.e. Thakur et al., 2021; eq. 7)

In [3]:
# Loads all data in the folder
def load_all_data(path):
    file_list = sorted(glob.glob(path + '*corr.csv', recursive=True))

    data_list = []
    for filename in file_list:
        fn_without_path = filename.replace(path[:-1], "")[1:]
        print("Loading", fn_without_path)
        
        temp = pd.read_csv(filename)
        temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%Y-%m-%d %H:%M:%S', utc=True)
       
        temp.loc[temp['Month'] == 'July', 'Season'] = 'Summer'
        temp.loc[temp['Month'] == 'August', 'Season'] = 'Summer'
        temp.loc[temp['Month'] == 'October', 'Season'] = 'Autumn'
        temp.loc[temp['Month'] == 'March', 'Season'] = 'Spring'
        temp.loc[temp['Ecosystem'] == 'Solar', 'Ecosystem'] = 'PV field'
        temp.loc[temp['Ecosystem'] == 'Desert', 'Ecosystem'] = 'Desert background'
        temp.loc[temp['Ecosystem'] == 'Yatir', 'Ecosystem'] = 'Yatir desert'
    
        data_list.append(temp)
    
    # Combine all the read data
    df = pd.concat(data_list, axis=0, ignore_index=True)
    
    # Rename
    df.rename(columns={'LW_IN_average_corr': 'Lin'}, inplace=True)
    df.rename(columns={'LW_OUT_average_corr': 'Lout'}, inplace=True)
    df.rename(columns={'SW_IN_average': 'SWin'}, inplace=True)
    df.rename(columns={'SW_OUT_average': 'SWout'}, inplace=True)
    df.rename(columns={'PA_average': 'Pa'}, inplace=True)
    
    # Convert columns to float
    df['H']  = pd.to_numeric(df['H'], downcast="float")
    df['LE'] = pd.to_numeric(df['LE'], downcast="float")
    df['Lin']  = pd.to_numeric(df['Lin'], downcast="float")
    df['Lout'] = pd.to_numeric(df['Lout'], downcast="float")
    df['SWin']  = pd.to_numeric(df['SWin'], downcast="float")
    df['SWout'] = pd.to_numeric(df['SWout'], downcast="float")
    df['H2O'] = pd.to_numeric(df['H2O'], downcast="float")
    df['Pa'] = pd.to_numeric(df['Pa'], downcast="float")
    df['Temperaturek']  = pd.to_numeric(df['TA_average'], downcast="float")
    
    df['Ta']  = df['Temperaturek'] - 273.15
    
    return(df)

# def rename_and_convert_mast(df):
    
#     # Rename
#     df.rename(columns={'LW_IN_average_corr': 'Lin'}, inplace=True)
#     df.rename(columns={'LW_OUT_average_corr': 'Lout'}, inplace=True)
#     df.rename(columns={'SW_IN_average': 'SWin'}, inplace=True)
#     df.rename(columns={'SW_OUT_average': 'SWout'}, inplace=True)
           
#     # Convert columns to float
#     df['H']  = pd.to_numeric(df['H'], downcast="float")
#     df['LE'] = pd.to_numeric(df['LE'], downcast="float")
#     df['Lin']  = pd.to_numeric(df['Lin'], downcast="float")
#     df['Lout'] = pd.to_numeric(df['Lout'], downcast="float")
#     df['SWin']  = pd.to_numeric(df['SWin'], downcast="float")
#     df['SWout'] = pd.to_numeric(df['SWout'], downcast="float")

#     # Calculating Rn
#     df['Rn'] = df['SWin'] - df['SWout'] + df['Lin'] - df['Lout']

#     # Remove PV field autumn and summer, because it will come from drone_df
#     #df.drop((df['Season'].isin(['Summer','Autumn'])) & (df['Ecosystem'] == 'PV field'))
    
#     # Keep only relevant columns
#     df = df[['DateTime','Season','Year','Ecosystem','H','LE','Rn']]
         
#     return(df)

def calculate_Ts(Lin, Lout, emissivity):
    # Calculating temperature from Lout
    # Constants
    sigma = 5.670374419*10**(-8) # Stefan-Boltzmann constant
    
    # Surface temperature [K]
    Ts_K = ((Lout - (1 - emissivity) * Lin) / (emissivity * sigma))**(1/4)
    Ts_C = Ts_K - 273.15
    
    return(Ts_C)

def calculate_Ts_simple(Lemitted, emissivity):
    # Calculating temperature from Lout
    # Constants
    sigma = 5.670374419*10**(-8) # Stefan-Boltzmann constant
    
    # Surface temperature [K]
    Ts_K = (Lemitted / (emissivity * sigma))**(1/4)
    Ts_C = Ts_K - 273.15
    
    return(Ts_C)

def rename_and_convert_drone(df):
    
    # Rename
    df.rename(columns={'LW_IN_average_corr': 'Lin_from_drone'}, inplace=True)
    df.rename(columns={'LWout_corr_bydrone': 'Lout_from_drone'}, inplace=True)
    df.rename(columns={'SW_IN_average': 'SWin'}, inplace=True)

       
    # Convert columns to float
    df['H']  = pd.to_numeric(df['H'], downcast="float")
    df['LE'] = pd.to_numeric(df['LE'], downcast="float")
    df['Lin_from_drone']  = pd.to_numeric(df['Lin_from_drone'], downcast="float")
    df['Lout_from_drone'] = pd.to_numeric(df['Lout_from_drone'], downcast="float")
    df['SWin']  = pd.to_numeric(df['SWin'], downcast="float")

    # Estimating SWout based on the fixed albedo value from March 2018 (0.23)
    df['SWout'] = df['SWin']*0.24
    df['PVeff'] = 0.058
    df['PVe'] = df['SWin'] * df['PVeff'] 

    # Calculating Rn
    df['Rn_from_drone'] = df['SWin'] - df['SWout'] + df['Lin_from_drone'] - df['Lout_from_drone']
    df['Rn_from_drone_pv'] = df['SWin'] - df['SWout'] + df['Lin_from_drone'] - df['Lout_from_drone'] - df['PVe']
    
    df['Ecosystem'] = 'PV field'
    df['Year'] = df['DateTime'].dt.year
    df['Month'] = df['DateTime'].dt.month

    
    # Keep only relevant columns
    df = df[['DateTime','Season','Year','Ecosystem','H','LE','Lout_from_drone','Rn_from_drone','Rn_from_drone_pv','PVeff','PVe','SWin']] 
         
    return(df)

def averaging(temp):
    
    # define mid-day
    temp = temp.loc[(temp['DateTime'].dt.hour >= 10) & (temp['DateTime'].dt.hour < 15)].copy()
    
    # define mid-night
    #temp = temp.loc[(temp['DateTime'].dt.hour >= 0) & (temp['DateTime'].dt.hour < 4)].copy()
    
    #print('before',len(temp.index))
    # uStar filter
    #temp = temp.loc[(temp['uStar'] >= 0.2)].copy()
    #print('after',len(temp.index))

    # Make mean and std dev
    df_means = temp.groupby(['Season','Ecosystem']).mean().reset_index()
    df_sds   = temp.groupby(['Season','Ecosystem']).std().reset_index()
    # rename columns
    df_means.rename(columns={'H': 'H_mean'}, inplace=True)
    df_means.rename(columns={'LE': 'LE_mean'}, inplace=True)
    df_means.rename(columns={'Rn': 'Rn_mean'}, inplace=True)
    df_means.rename(columns={'Ta': 'Ta_mean'}, inplace=True)
    df_means.rename(columns={'Ts': 'Ts_mean'}, inplace=True)
    df_means.rename(columns={'D_T': 'D_T_mean'}, inplace=True)
    df_means.rename(columns={'Pa': 'Pa_mean'}, inplace=True)
    df_means.rename(columns={'H2O': 'H2O_mean'}, inplace=True)
    df_means.rename(columns={'Lout': 'Lout_mean'}, inplace=True)
    df_means.rename(columns={'Lin': 'Lin_mean'}, inplace=True)
    df_means.rename(columns={'rho': 'rho_mean'}, inplace=True)
    df_means.rename(columns={'cp': 'cp_mean'}, inplace=True)
    df_means.rename(columns={'rH': 'rH_mean'}, inplace=True)

    df_sds.rename(columns={'H': 'H_sd'}, inplace=True)
    df_sds.rename(columns={'LE': 'LE_sd'}, inplace=True)
    df_sds.rename(columns={'Rn': 'Rn_sd'}, inplace=True)
    df_sds.rename(columns={'Ta': 'Ta_sd'}, inplace=True)
    df_sds.rename(columns={'Ts': 'Ts_sd'}, inplace=True)
    df_sds.rename(columns={'D_T': 'D_T_sd'}, inplace=True)
    df_sds.rename(columns={'Pa': 'Pa_sd'}, inplace=True)
    df_sds.rename(columns={'H2O': 'H2O_sd'}, inplace=True)
    df_sds.rename(columns={'Lout': 'Lout_sd'}, inplace=True)
    df_sds.rename(columns={'Lin': 'Lin_sd'}, inplace=True)
    df_sds.rename(columns={'rho': 'rho_sd'}, inplace=True)
    df_sds.rename(columns={'cp': 'cp_sd'}, inplace=True)
    df_sds.rename(columns={'rH': 'rH_sd'}, inplace=True)
    
    merged = df_means.merge(df_sds, on=['Season','Ecosystem'])
    
    # Keep only relevant columns
    merged = merged[['Season','Ecosystem','H_mean','LE_mean','Rn_mean','Ta_mean','Ts_mean','D_T_mean','Pa_mean','H2O_mean','Lout_mean','Lin_mean','rho_mean','cp_mean',\
                     'H_sd','LE_sd','Rn_sd','Ta_sd','Ts_sd','D_T_sd','Pa_sd','H2O_sd','Lout_sd','Lin_sd','rho_sd','cp_sd','rH_mean','rH_sd']]
    
    return(merged)

def load_tower(fn, silent=False):
    if (not silent): print('EC Tower')
    temp = pd.read_csv(fn, index_col=None)
    temp.rename({'date_mid_hour': 'DateTime'}, axis=1, inplace=True)
    temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%d%b%y:%H:%M', utc=True)
    # Remove obsolete columns
    temp.drop(['year','date','DOY','month','weekNo','mid_hour','mmyy','Bat_V','Hum_AC'], axis=1, inplace=True)
    if (not silent): print("    ", '100.0 %\t', fn.split('/')[-1])
    return(temp)


# def diurnal(temp,Ecosystem,Season):
    
#     temp = temp.loc[(temp['Ecosystem'] == Ecosystem) & (temp['Season'] == Season)].copy()
    
#     # creating 'Time' column
#     temp['Time'] = temp['DateTime'].dt.strftime('%H:%M')
    
#     # Keep only relevant columns
#     temp = temp[['Time','Ecosystem','Lout']]
    
#     # Make mean and std dev
#     df_means = temp.groupby(['Ecosystem','Time']).mean().reset_index()
#     df_sds   = temp.groupby(['Ecosystem','Time']).std().reset_index()
#     # rename columns
#     df_means.rename(columns={'Lout': 'Lout_mean'}, inplace=True)
#     df_means.rename(columns={'Lin': 'Lin_mean'}, inplace=True)
#     df_means.rename(columns={'Sout': 'Sout_mean'}, inplace=True)
#     df_means.rename(columns={'Sin': 'Sin_mean'}, inplace=True)
#     df_means.rename(columns={'PARout': 'PARout_mean'}, inplace=True)
#     df_means.rename(columns={'PARin': 'PARin_mean'}, inplace=True)
    
#     df_sds.rename(columns={'Lout': 'Lout_sd'}, inplace=True)
#     df_sds.rename(columns={'Lin': 'Lin_sd'}, inplace=True)
#     df_sds.rename(columns={'Sout': 'Sout_sd'}, inplace=True)
#     df_sds.rename(columns={'Sin': 'Sin_sd'}, inplace=True)
#     df_sds.rename(columns={'PARout': 'PARout_sd'}, inplace=True)
#     df_sds.rename(columns={'PARin': 'PARin_sd'}, inplace=True)
    
#     merged = df_means.merge(df_sds, on=['Ecosystem','Time'])
    
#     return(merged)

def pvalue_text(p):
    if(p <= 0.001): p_text = '<.001'
    if(p > 0.001): p_text = '<.01'
    if(p > 0.01): p_text = '<.05'
    if(p > 0.05): p_text = p.round(2).astype(str)
    return(p_text)

def ttest_all(temp, category, list_of_test_cols, group_col):
    # Prepare name of categories to test against each other
    group1 = temp[group_col].unique()[0]
    group2 = temp[group_col].unique()[1]
    print('Testing', group1, '&', group2)
    
    # Prepare df for results
    out_df = pd.DataFrame(list(product(list_of_test_cols, temp[category].dropna().unique())), columns=['Parameter', category])
    out_df['p'] = np.nan
    
    # Do t-tests for all combinations
    for col in list_of_test_cols:
        #print('----')
        #print(col)
        for cat in temp[category].dropna().unique():
            #print(cat)
            a = temp.loc[(temp[group_col] == group1) & (temp[category] == cat),col]
            b = temp.loc[(temp[group_col] == group2) & (temp[category] == cat),col]
            t = stats.ttest_ind(a, b, equal_var=False, nan_policy='omit') # Welch t-test for inequal variances
            print(col, cat, ':', pvalue_text(t[1]))
            # Add data to resulting df
            out_df.loc[(out_df[category] == cat) & (out_df['Parameter'] == col), 'p'] = pvalue_text(t[1])
    
    return(out_df)

def ttest_all2(temp, categories_to_test, list_data_cols, test_type='independent'):
    # Prepare list of lists
    list_of_lists = []
    for cat_i, cat in enumerate(categories_to_test):
        sub_categories = list(temp[cat].unique())
        list_of_lists.append(sub_categories)
    print('All categories:', list_of_lists)
    
    # Count number of categories
    cat_count = len(categories_to_test)
    list_of_dfs = []
    
    # Go through the list of all data columns
    for data_col in list_data_cols:
        print("Testing data column: ", data_col)
        
        # Prepare empty list of lists to fill
        out_list = []
        # List all combinations, and go through
        for i in list(combinations( list(product(*list_of_lists)) , 2)):
            # If no x-1 elements (for 3 categories, that's 2) overlaps, skip
            # This makes sure not to test combinations where everything is different
            # (e.g. Autumn Pines Hamsin vs Spring Maquis Normal)
            if(sum([j in i[0] for j in i[1]]) < (cat_count-1)):
                continue
            #print(i)  # DEBUG
            # Prepare row to append data
            row_list = []
            a_conditions = []
            b_conditions = []
            # Check which elements are the same in each pair
            for j in range(cat_count):
                if(i[0][j] == i[1][j]):
                    #print('Identical columns:', i[0][j]) # DEBUG
                    row_list.append(i[0][j])
                else:
                    #print('Column to test:', categories_to_test[j]) # DEBUG
                    row_list.append('')
                # Create list of conditions
                a_conditions.append(temp[categories_to_test[j]] == i[0][j])
                b_conditions.append(temp[categories_to_test[j]] == i[1][j])
            # Create the text of which 2 variables are being tested against each other
            test_str = list(set(i[0]) - set(i[1]))[0] + ' vs. ' + list(set(i[1]) - set(i[0]))[0]
            row_list.append(test_str)
            # Prepare t-test
            a = temp.loc[reduce(np.logical_and, a_conditions), data_col]
            b = temp.loc[reduce(np.logical_and, b_conditions), data_col]
            if(test_type == 'independent'):
                t = stats.ttest_ind(a, b, equal_var=False, nan_policy='omit') # Welch t-test for inequal variances
            else:
                t = stats.ttest_rel(a, b, nan_policy='omit') # Paired t-test
            #row_list.append(t[1]) # Full P value
            row_list.append(pvalue_text(t[1])) # P value as text
            # Add p values to final output
            out_list.append(row_list)
    
        # Column names
        colnames = categories_to_test + ['Test','P_'+data_col]
        out_df = pd.DataFrame(out_list, columns=colnames)
        list_of_dfs.append(out_df)
    
    # Finally merge all
    final_df = reduce(lambda df1,df2: pd.merge(df1,df2, on=categories_to_test + ['Test']), list_of_dfs)
    print('Done...')
    
    return(final_df)

In [4]:
# Calculate saturation vapour pressure from pressure and temperature
# - 2 methods are available. Jones uses air pressure, Campbell & Norman do not
def calculate_es(T_C, P_Pa):
    # Jones p.348 (appendix 4)
    #es = (1.00072+(10**(-7)*P_Pa*(0.032+5.9*10**(-6)*T_C**2))) * (611.21*np.exp( (18.678-(T_C/234.5))*T_C/(257.14+T_C) ))

    # Eddypro manual: https://www.licor.com/env/support/EddyPro/topics/calculate-micromet-variables.html
    # Campbell & Norman (1998)
    T_K = T_C + 273.15
    es = T_K**(-8.2) * np.exp(77.345 + 0.0057*T_K - 7235 * T_K**(-1))
    return(es)

# Converts water concentration [mmol.mol] to RH [%]
def convert_mmol_RH(T_C, h2o_mmol_mol, P_Pa):
    
    T_K = T_C + 273.15
    #es = calculate_es(T_C, P_Pa)
    #RH <- 0.263*P_Pa*((h2o_mmol_mol*18.02/28.97)/1000)*np.exp(17.67*(T_C)/(T_K-29.65))**(-1)
    #RH = 100 if (RH > 100) else RH
    #RH = np.nan if (RH < 5) else RH

    # From Eddypro manual: https://www.licor.com/env/support/EddyPro/topics/calculate-micromet-variables.html
    R  = 8.314463                  # Ideal gas constant (J K-1 mol-1)
    M_d   = 0.02897                # molecular weights of dry air (kg mol-1)
    M_h2o = 0.01802                # molecular weights of water vapour (kg mol-1)
    R_h2o = R / M_h2o              # Water vapor gas constant (J K-1 mol-1)
    es = calculate_es(T_C, P_Pa)   # Water vapor partial pressure at saturation (Pa)
    P_d = P_Pa - es                # Dry air partial pressure (P_d, P_a)
    rho_d = P_d / (R / M_d * T_K)  # Dry air mass density (rho_d, kg m-3)
    v_d = M_d / rho_d              # Dry air molar volume (vd, m3 mol-1)
    v_a = v_d * P_d/P_Pa           # Air molar volume (vd, m3mol-1) 
    rho_h2o = h2o_mmol_mol/1000 * M_h2o / v_a # Water vapor mole fraction
    e  = rho_h2o * R_h2o * T_K     # Water vapor partial pressure (Pa) 
    RH = e/es * 100                # RH (%)
    return(RH)

# Density of dry air
# - https://en.wikipedia.org/wiki/Density_of_air
def calculate_rho_dry_air(T_C, P_Pa):
    # Constants
    R_dry_air = 287.058     # [J/(kg·K)] Specific gas const dry air
    # Calculations
    T_K = T_C + 273.15
    rho_dry_air = P_Pa / (R_dry_air * T_K) # Density of dry air (use for approximation)
    return(rho_dry_air)

# Density of moist air
def calculate_rho_moist_air(T_C, h2o_mmol_mol, P_Pa):
    # Temperature in K
    T_K = T_C + 273.15

    # Preparations
    R     = 8.314463             # Ideal gas constant (J K-1 mol-1)
    M_d   = 0.02897              # molecular weights of dry air (kg mol-1)
    M_h2o = 0.01802              # molecular weights of water vapour (kg mol-1)
    es = calculate_es(T_C, P_Pa) # Saturation vapour pressure (Pa)
    P_d = P_Pa - es              # Dry air partial pressure (P_d, P_a)
    rho_d = P_d / (R / M_d * T_K) # Dry air mass density (rho_d, kg m-3)
    v_d = M_d / rho_d            # Dry air molar volume (vd, m3 mol-1)
    v_a = v_d * P_d/P_Pa         # Air molar volume (vd, m3mol-1) 
    rho_h2o = h2o_mmol_mol/1000 * M_h2o / v_a # Water vapor mole fraction

    # Moist air mass density (ρa, kg m-3) 
    rho_air = rho_d + rho_h2o

    return(rho_air)

# Dry air heat capacity at constant pressure
# cp_d in [J kg-1 K-1]
 # https://www.licor.com/env/support/EddyPro/topics/calculate-micromet-variables.html
def calculate_cp_dry_air(T_C):
    cp = 1005 + ((T_C + 23.12)**2)/3364
    return(cp)

# Specific heat capacity of moist air at constant pressure
# cp_m in [J kg-1 K-1]
# https://www.licor.com/env/support/EddyPro/topics/calculate-micromet-variables.html
def calculate_cp_moist_air(T_C, h2o_mmol_mol, P_Pa):
    # Temperature in K
    T_K = T_C + 273.15

    # RH
    RH = convert_mmol_RH(T_C, h2o_mmol_mol, P_Pa)

    # Water vapor heat capacity at constant pressure (cp_h2o, J kg-1 K-1)
    cp_h2o = 1859 + 0.13*RH + (0.193 + 5.6*10**(-3) * RH)*T_C + (10**(-3) + 5 * 10**(-5)*RH)*T_C**2

    # Preparations
    R     = 8.314463             # Ideal gas constant (J K-1 mol-1)
    M_d   = 0.02897              # molecular weights of dry air (kg mol-1)
    M_h2o = 0.01802              # molecular weights of water vapour (kg mol-1)
    es = calculate_es(T_C, P_Pa) # Saturation vapour pressure (Pa)
    P_d = P_Pa - es              # Dry air partial pressure (P_d, P_a)
    rho_d = P_d / (R / M_d * T_K) # Dry air mass density (rho_d, kg m-3)
    v_d = M_d / rho_d            # Dry air molar volume (vd, m3 mol-1)
    v_a = v_d * P_d/P_Pa         # Air molar volume (vd, m3mol-1) 
    rho_h2o = h2o_mmol_mol/1000 * M_h2o / v_a # Water vapor mole fraction

    # Moist air mass density (ρa, kg m-3) 
    rho_air = rho_d + rho_h2o

    # Specific humidity (Q, kg kg-1) 
    Q = rho_h2o / rho_air

    # cp_moist
    cp = calculate_cp_dry_air(T_C) * (1-Q) + cp_h2o * Q
    return(cp)

def calculate_Lemitted(Lsensor, Lin, emissivity):
    Lemitted = Lsensor - (1 - emissivity)*Lin
    return(Lemitted)

def calculate_Lout_from_Lemitted(Lemitted, Lin, emissivity):
    Lout = Lemitted + (1 - emissivity)*Lin
    return(Lout)

# Calculates Rn
# If PVe=0 is given, it is calculated without it (i.e., it isn't a PV field)
def calculate_Rn(swin, swout, lwin, lwout, pve=0):
    # Calculate Rn
    Rn = swin - swout + lwin - lwout - pve
    return(Rn)

## Load Ketura and Yatir desert data

In [5]:
# Load data
mast_df = load_all_data(input_path)

# Prepare empty Lemitted and emissivity columns
mast_df['Lemitted'] = np.nan
mast_df['emissivity'] = np.nan
# Add emissivity
mast_df.loc[mast_df['Ecosystem'] == 'Desert background', 'emissivity'] = 0.87
mast_df.loc[mast_df['Ecosystem'] == 'PV field', 'emissivity'] = 0.85 # considering that Irina's measurements for 30 degrees inclination of the panels were 0.83 and 0.84. Therefore, 0.85 would be a good mean for 50% panels (0.83-0.84) and 50% soil (0.87) cover of the field
mast_df.loc[mast_df['Ecosystem'] == 'Yatir desert', 'emissivity'] = 0.88
# Fix mast Lout
mast_df['Lemitted'] = calculate_Lemitted(mast_df['Lout'], mast_df['Lin'], mast_df['emissivity']) # Replace the already corrected ones
# NOTE: PV field values in Summer & Autumn need to be replaced using the drone data

#mast_df = mast_df.loc[mast_df['Season'] == 'Spring'].copy()

# Remove the bad 22nd of march 2018
mast_df = mast_df.loc[(mast_df['DateTime'] <= '2018-03-21 23:59') | (mast_df['DateTime'] >= '2018-03-23 00:00')].copy()

display(mast_df)
display(mast_df.columns.values)

Loading Ketura_all_corr.csv
Loading Yatir_desert_all_corr.csv


Unnamed: 0,DateTime,Month,Year,Ecosystem,H,LE,Pa,RH,TA_average,VPD,...,Lout,albedo,WS_average,Tsonic,uStar,Season,Temperaturek,Ta,Lemitted,emissivity
0,2019-07-09 20:30:00+00:00,July,2019,Desert background,76.646301,-4.342080,99048.000000,25.66,308.80,4320.580,...,503.072601,inf,4.55307,315.005,0.496929,Summer,308.799988,35.649994,450.111511,0.87
1,2019-07-09 21:00:00+00:00,July,2019,Desert background,-14.566100,,99097.601562,21.92,308.22,4395.130,...,497.234985,,4.95904,313.243,0.550489,Summer,308.220001,35.070007,445.194817,0.87
2,2019-07-09 21:30:00+00:00,July,2019,Desert background,-9.589140,20.623699,99121.000000,21.71,307.68,4277.140,...,492.450989,,4.61193,312.346,0.320482,Summer,307.679993,34.529999,440.941607,0.87
3,2019-07-09 22:00:00+00:00,July,2019,Desert background,-20.194000,-16.165701,99136.203125,21.65,307.27,4184.020,...,488.117004,,2.83385,311.566,0.290064,Summer,307.269989,34.119995,436.965123,0.87
4,2019-07-09 22:30:00+00:00,July,2019,Desert background,-12.366500,-1.476210,99127.101562,22.36,306.90,4061.500,...,484.428009,,2.12721,311.007,0.254991,Summer,306.899994,33.750000,433.521960,0.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2872,2015-08-29 23:00:00+00:00,August,2015,Yatir desert,1.393080,0.442754,94044.703125,78.37,294.03,484.134,...,417.607605,inf,1.25712,296.517,,Summer,294.029999,20.880005,378.014567,0.88
2873,2015-08-29 23:30:00+00:00,August,2015,Yatir desert,-0.722983,-17.577499,94035.000000,78.92,293.74,461.235,...,415.586853,inf,1.99739,296.324,,Summer,293.739990,20.589996,375.924213,0.88
2874,2015-08-30 00:00:00+00:00,August,2015,Yatir desert,-1.056220,-6.041320,94012.796875,80.00,293.82,421.278,...,415.492798,inf,1.76369,295.862,,Summer,293.820007,20.670013,375.683757,0.88
2875,2015-08-30 00:30:00+00:00,August,2015,Yatir desert,-5.523640,-17.121500,93996.703125,82.70,293.60,368.748,...,412.988922,inf,2.49107,295.832,,Summer,293.600006,20.450012,373.121683,0.88


array(['DateTime', 'Month', 'Year', 'Ecosystem', 'H', 'LE', 'Pa', 'RH',
       'TA_average', 'VPD', 'H2O', 'PARin', 'PARout', 'SWin', 'SWout',
       'Lin', 'Lout', 'albedo', 'WS_average', 'Tsonic', 'uStar', 'Season',
       'Temperaturek', 'Ta', 'Lemitted', 'emissivity'], dtype=object)

## Load drone data

In [6]:
drone_df = pd.read_csv(input_path + "final_truck_LWout_corrected.csv") 
drone_df['DateTime'] = pd.to_datetime(drone_df['DateTime'], format='%Y-%m-%d %H:%M:%S', utc=True)
drone_df['Month'] = drone_df['DateTime'].dt.month

drone_df.loc[drone_df['Month'] == 7, 'Season'] = 'Summer'
drone_df.loc[drone_df['Month'] == 10, 'Season'] = 'Autumn'
drone_df.loc[drone_df['Month'] == 11, 'Season'] = 'Autumn'

drone_df = rename_and_convert_drone(drone_df)

display (drone_df)

Unnamed: 0,DateTime,Season,Year,Ecosystem,H,LE,Lout_from_drone,Rn_from_drone,Rn_from_drone_pv,PVeff,PVe,SWin
0,2019-07-16 14:30:00+00:00,Summer,2019,PV field,304.852997,28.705700,598.822998,498.863770,445.999032,0.058,52.864738,911.460999
1,2019-07-16 15:00:00+00:00,Summer,2019,PV field,366.420990,44.638302,593.456543,453.898193,405.017823,0.058,48.880371,842.765015
2,2019-07-16 15:30:00+00:00,Summer,2019,PV field,291.721985,16.261700,591.434753,394.885010,350.857615,0.058,44.027395,759.093018
3,2019-07-16 16:00:00+00:00,Summer,2019,PV field,314.447998,20.247400,583.401672,333.348328,294.866489,0.058,38.481839,663.479980
4,2019-07-16 16:30:00+00:00,Summer,2019,PV field,208.761002,20.630100,574.480713,267.213013,234.491210,0.058,32.721802,564.169006
...,...,...,...,...,...,...,...,...,...,...,...,...
746,2018-11-01 05:30:00+00:00,Autumn,2018,PV field,,,457.498688,-128.382385,-128.415275,0.058,0.032890,0.567066
747,2018-11-01 06:00:00+00:00,Autumn,2018,PV field,,,458.583893,-102.791687,-104.875296,0.058,2.083609,35.924301
748,2018-11-01 06:30:00+00:00,Autumn,2018,PV field,,,462.913452,-39.581940,-46.854328,0.058,7.272388,125.386002
749,2018-11-01 07:00:00+00:00,Autumn,2018,PV field,,,462.260071,30.065399,17.745963,0.058,12.319436,212.404068


## Combine mast and drone data

In [7]:
# Merge
all_df = mast_df.merge(drone_df[['DateTime','Season','Ecosystem','Lout_from_drone']], how='outer', on=['DateTime','Season','Ecosystem'])

# Replace Lemitted (mast) from drone in Summer & Atumn in the PV field only
all_df.loc[((all_df['Season'].isin(['Summer','Autumn'])) & (all_df['Ecosystem'] == 'PV field')), 'Lout'] = \
                   all_df.loc[((all_df['Season'].isin(['Summer','Autumn'])) & (all_df['Ecosystem'] == 'PV field')), 'Lout_from_drone']

# Calculate Lout from the drone Lemitted
all_df.loc[((all_df['Season'].isin(['Summer','Autumn'])) & (all_df['Ecosystem'] == 'PV field')), 'Lemitted'] = calculate_Lemitted( \
                   all_df.loc[((all_df['Season'].isin(['Summer','Autumn'])) & (all_df['Ecosystem'] == 'PV field')), 'Lout_from_drone'], \
                   all_df.loc[((all_df['Season'].isin(['Summer','Autumn'])) & (all_df['Ecosystem'] == 'PV field')), 'Lin'], \
                   all_df.loc[((all_df['Season'].isin(['Summer','Autumn'])) & (all_df['Ecosystem'] == 'PV field')), 'emissivity'])

# Provide values of PV efficiency, or 0 if it's not PV
PV_eff = 0.058
all_df['PVe'] = all_df['SWin'] * PV_eff
all_df.loc[all_df['Ecosystem'] != 'PV field', 'PVe'] = 0 # Remove it not PV field
# Rn calculation
all_df['Rn'] = calculate_Rn(all_df['SWin'], all_df['SWout'], all_df['Lin'], all_df['Lout'], all_df['PVe'])

# Clean up
all_df.drop('Lout_from_drone', axis=1, inplace=True)

display (all_df)

Unnamed: 0,DateTime,Month,Year,Ecosystem,H,LE,Pa,RH,TA_average,VPD,...,WS_average,Tsonic,uStar,Season,Temperaturek,Ta,Lemitted,emissivity,PVe,Rn
0,2019-07-09 20:30:00+00:00,July,2019,Desert background,76.646301,-4.342080,99048.000000,25.66,308.80,4320.580,...,4.55307,315.005,0.496929,Summer,308.799988,35.649994,450.111511,0.87,0.0,-95.718048
1,2019-07-09 21:00:00+00:00,July,2019,Desert background,-14.566100,,99097.601562,21.92,308.22,4395.130,...,4.95904,313.243,0.550489,Summer,308.220001,35.070007,445.194817,0.87,0.0,-96.925995
2,2019-07-09 21:30:00+00:00,July,2019,Desert background,-9.589140,20.623699,99121.000000,21.71,307.68,4277.140,...,4.61193,312.346,0.320482,Summer,307.679993,34.529999,440.941607,0.87,0.0,-96.224976
3,2019-07-09 22:00:00+00:00,July,2019,Desert background,-20.194000,-16.165701,99136.203125,21.65,307.27,4184.020,...,2.83385,311.566,0.290064,Summer,307.269989,34.119995,436.965123,0.87,0.0,-94.640991
4,2019-07-09 22:30:00+00:00,July,2019,Desert background,-12.366500,-1.476210,99127.101562,22.36,306.90,4061.500,...,2.12721,311.007,0.254991,Summer,306.899994,33.750000,433.521960,0.87,0.0,-92.843018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2824,2015-08-29 23:00:00+00:00,August,2015,Yatir desert,1.393080,0.442754,94044.703125,78.37,294.03,484.134,...,1.25712,296.517,,Summer,294.029999,20.880005,378.014567,0.88,0.0,-87.705322
2825,2015-08-29 23:30:00+00:00,August,2015,Yatir desert,-0.722983,-17.577499,94035.000000,78.92,293.74,461.235,...,1.99739,296.324,,Summer,293.739990,20.589996,375.924213,0.88,0.0,-85.080994
2826,2015-08-30 00:00:00+00:00,August,2015,Yatir desert,-1.056220,-6.041320,94012.796875,80.00,293.82,421.278,...,1.76369,295.862,,Summer,293.820007,20.670013,375.683757,0.88,0.0,-83.771210
2827,2015-08-30 00:30:00+00:00,August,2015,Yatir desert,-5.523640,-17.121500,93996.703125,82.70,293.60,368.748,...,2.49107,295.832,,Summer,293.600006,20.450012,373.121683,0.88,0.0,-80.769409


# Calculate $T_s$

In [8]:
all_df['Ts'] = calculate_Ts_simple(all_df['Lemitted'], all_df['emissivity']) # All campaigns
display(all_df)

all_df.to_csv(output_path + 'Ketura_with_Ts.csv')

temp = all_df
temp['Time'] = temp['DateTime'].dt.strftime('%H:%M')

a = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00') & (temp['Ecosystem'] == 'PV field') & (temp['Season'] == 'Summer')),'Ts']
b = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00') & (temp['Ecosystem'] == 'Desert background') & (temp['Season'] == 'Summer')),'Ts']
t = stats.ttest_ind(a, b, nan_policy='omit')
print('Ts Summer desert vs. PV , mid-day: P =', pvalue_text(t[1]))

a = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00') & (temp['Ecosystem'] == 'PV field') & (temp['Season'] == 'Summer')),'Lout']
b = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00') & (temp['Ecosystem'] == 'Desert background') & (temp['Season'] == 'Summer')),'Lout']
t = stats.ttest_ind(a, b, nan_policy='omit')
print('Lout Summer desert vs. PV , mid-day: P =', pvalue_text(t[1]))

a = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00') & (temp['Ecosystem'] == 'PV field') & (temp['Season'] == 'Spring')),'Ts']
b = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00') & (temp['Ecosystem'] == 'Desert background') & (temp['Season'] == 'Spring')),'Ts']
t = stats.ttest_ind(a, b, nan_policy='omit')
print('Ts Spring desert vs. PV , mid-day: P =', pvalue_text(t[1]))

Unnamed: 0,DateTime,Month,Year,Ecosystem,H,LE,Pa,RH,TA_average,VPD,...,Tsonic,uStar,Season,Temperaturek,Ta,Lemitted,emissivity,PVe,Rn,Ts
0,2019-07-09 20:30:00+00:00,July,2019,Desert background,76.646301,-4.342080,99048.000000,25.66,308.80,4320.580,...,315.005,0.496929,Summer,308.799988,35.649994,450.111511,0.87,0.0,-95.718048,35.913190
1,2019-07-09 21:00:00+00:00,July,2019,Desert background,-14.566100,,99097.601562,21.92,308.22,4395.130,...,313.243,0.550489,Summer,308.220001,35.070007,445.194817,0.87,0.0,-96.925995,35.065714
2,2019-07-09 21:30:00+00:00,July,2019,Desert background,-9.589140,20.623699,99121.000000,21.71,307.68,4277.140,...,312.346,0.320482,Summer,307.679993,34.529999,440.941607,0.87,0.0,-96.224976,34.326921
3,2019-07-09 22:00:00+00:00,July,2019,Desert background,-20.194000,-16.165701,99136.203125,21.65,307.27,4184.020,...,311.566,0.290064,Summer,307.269989,34.119995,436.965123,0.87,0.0,-94.640991,33.631344
4,2019-07-09 22:30:00+00:00,July,2019,Desert background,-12.366500,-1.476210,99127.101562,22.36,306.90,4061.500,...,311.007,0.254991,Summer,306.899994,33.750000,433.521960,0.87,0.0,-92.843018,33.025213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2824,2015-08-29 23:00:00+00:00,August,2015,Yatir desert,1.393080,0.442754,94044.703125,78.37,294.03,484.134,...,296.517,,Summer,294.029999,20.880005,378.014567,0.88,0.0,-87.705322,21.871408
2825,2015-08-29 23:30:00+00:00,August,2015,Yatir desert,-0.722983,-17.577499,94035.000000,78.92,293.74,461.235,...,296.324,,Summer,293.739990,20.589996,375.924213,0.88,0.0,-85.080994,21.462706
2826,2015-08-30 00:00:00+00:00,August,2015,Yatir desert,-1.056220,-6.041320,94012.796875,80.00,293.82,421.278,...,295.862,,Summer,293.820007,20.670013,375.683757,0.88,0.0,-83.771210,21.415583
2827,2015-08-30 00:30:00+00:00,August,2015,Yatir desert,-5.523640,-17.121500,93996.703125,82.70,293.60,368.748,...,295.832,,Summer,293.600006,20.450012,373.121683,0.88,0.0,-80.769409,20.912077


Ts Summer desert vs. PV , mid-day: P = 0.75
Lout Summer desert vs. PV , mid-day: P = 0.81
Ts Spring desert vs. PV , mid-day: P = 0.41


In [9]:
#all_df = all_df.loc[all_df['Season'] == 'Summer'].merge(drone_t_df[['DateTime','Season','Ecosystem','Ts_PV_field']], on=['DateTime','Season','Ecosystem'], how='left')
all_df['D_T'] = all_df['Ts'] - all_df['Ta']
all_df = all_df.loc[all_df['Season'] == 'Summer'].copy()
display(all_df)

Unnamed: 0,DateTime,Month,Year,Ecosystem,H,LE,Pa,RH,TA_average,VPD,...,Season,Temperaturek,Ta,Lemitted,emissivity,PVe,Rn,Ts,Time,D_T
0,2019-07-09 20:30:00+00:00,July,2019,Desert background,76.646301,-4.342080,99048.000000,25.66,308.80,4320.580,...,Summer,308.799988,35.649994,450.111511,0.87,0.0,-95.718048,35.913190,20:30,0.263196
1,2019-07-09 21:00:00+00:00,July,2019,Desert background,-14.566100,,99097.601562,21.92,308.22,4395.130,...,Summer,308.220001,35.070007,445.194817,0.87,0.0,-96.925995,35.065714,21:00,-0.004293
2,2019-07-09 21:30:00+00:00,July,2019,Desert background,-9.589140,20.623699,99121.000000,21.71,307.68,4277.140,...,Summer,307.679993,34.529999,440.941607,0.87,0.0,-96.224976,34.326921,21:30,-0.203078
3,2019-07-09 22:00:00+00:00,July,2019,Desert background,-20.194000,-16.165701,99136.203125,21.65,307.27,4184.020,...,Summer,307.269989,34.119995,436.965123,0.87,0.0,-94.640991,33.631344,22:00,-0.488651
4,2019-07-09 22:30:00+00:00,July,2019,Desert background,-12.366500,-1.476210,99127.101562,22.36,306.90,4061.500,...,Summer,306.899994,33.750000,433.521960,0.87,0.0,-92.843018,33.025213,22:30,-0.724787
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2824,2015-08-29 23:00:00+00:00,August,2015,Yatir desert,1.393080,0.442754,94044.703125,78.37,294.03,484.134,...,Summer,294.029999,20.880005,378.014567,0.88,0.0,-87.705322,21.871408,23:00,0.991403
2825,2015-08-29 23:30:00+00:00,August,2015,Yatir desert,-0.722983,-17.577499,94035.000000,78.92,293.74,461.235,...,Summer,293.739990,20.589996,375.924213,0.88,0.0,-85.080994,21.462706,23:30,0.872709
2826,2015-08-30 00:00:00+00:00,August,2015,Yatir desert,-1.056220,-6.041320,94012.796875,80.00,293.82,421.278,...,Summer,293.820007,20.670013,375.683757,0.88,0.0,-83.771210,21.415583,00:00,0.745570
2827,2015-08-30 00:30:00+00:00,August,2015,Yatir desert,-5.523640,-17.121500,93996.703125,82.70,293.60,368.748,...,Summer,293.600006,20.450012,373.121683,0.88,0.0,-80.769409,20.912077,00:30,0.462064


## Loading Yatir data

In [10]:
# Using Augusts of 2013-2020
yatir_df = load_tower(input_path + 'Yatir_2000-2020.csv')
yatir_df = yatir_df.loc[(yatir_df['DateTime'].dt.year >= 2013)].copy()
yatir_df = yatir_df.loc[(yatir_df['DateTime'].dt.month == 8)].copy()
yatir_df['DateTime'] = yatir_df['DateTime'] - pd.Timedelta(minutes=15)

# Rename
yatir_df.rename(columns={'S_top_atm(CM21_IV)_Wm-2': 'Sin'}, inplace=True)
yatir_df.rename(columns={'S_top_eco(CM21_III)_Wm-2': 'Sout'}, inplace=True)
yatir_df.rename(columns={'PAR_top_atm(IV)_umol_m-2s-1': 'PARin'}, inplace=True)
yatir_df.rename(columns={'PAR_top_eco(III)_umol_m-2s-1': 'PARout'}, inplace=True)
yatir_df.rename(columns={'L_top_atm(PIR_IV)_Wm-2': 'Lin'}, inplace=True)
yatir_df.rename(columns={'L_top_eco(PIR_III)_Wm-2': 'Lout'}, inplace=True)
yatir_df.rename(columns={'Fc_Cor_umol_m-2s-1': 'NEE'}, inplace=True)
yatir_df.rename(columns={'LE_Wm-2_Avg': 'LE'}, inplace=True)
yatir_df.rename(columns={'H_Wm-2': 'H'}, inplace=True)
yatir_df.rename(columns={'H2O_Con_mmol_mol-1': 'H2O'}, inplace=True)
yatir_df.rename(columns={'AirPress_Pa': 'Pa'}, inplace=True)
yatir_df.rename(columns={'Ustar_ms-1': 'uStar'}, inplace=True)

# yatir_df.rename(columns={'date_mid_hour': 'DateTime'}, inplace=True)

# yatir_df['DateTime'] = pd.to_datetime(yatir_df['DateTime'], format='%Y-%m-%d %H:%M:%S', utc=True)

# Convert columns to float
yatir_df['Sin']  = pd.to_numeric(yatir_df['Sin'], downcast="float")
yatir_df['Sout']  = pd.to_numeric(yatir_df['Sout'], downcast="float")
yatir_df['PARin']  = pd.to_numeric(yatir_df['PARin'], downcast="float")
yatir_df['PARout']  = pd.to_numeric(yatir_df['PARout'], downcast="float")
yatir_df['Lin']  = pd.to_numeric(yatir_df['Lin'], downcast="float")
yatir_df['Lout']  = pd.to_numeric(yatir_df['Lout'], downcast="float")
yatir_df['NEE']  = pd.to_numeric(yatir_df['NEE'], downcast="float")
yatir_df['LE']  = pd.to_numeric(yatir_df['LE'], downcast="float")
yatir_df['H']  = pd.to_numeric(yatir_df['H'], downcast="float")
yatir_df['H2O']  = pd.to_numeric(yatir_df['H2O'], downcast="float")
yatir_df['Pa']  = pd.to_numeric(yatir_df['Pa'], downcast="float")
yatir_df['uStar']  = pd.to_numeric(yatir_df['uStar'], downcast="float")

# Correct Lout for the emissivity
yatir_df['emissivity'] = 0.873  # Thakur 2021
yatir_df['Lemitted'] = calculate_Lemitted(yatir_df['Lout'], yatir_df['Lin'], yatir_df['emissivity'])
    
# Calculate Rn
yatir_df['albedo'] = yatir_df['Sout']/yatir_df['Sin']
yatir_df['Rn'] = yatir_df['Sin'] - yatir_df['Sout'] + yatir_df['Lin'] - yatir_df['Lout']


# Make air temperature mean of top of tower, according to the system used in 
def mean_tower_temp(temp, T_col_list):
    # Prepare data
    temp = temp[['DateTime'] + T_col_list].copy() # timestamp added for debugging only
    # Calculate the mean
    temp['T_mean'] = temp[T_col_list].mean(axis=1)
    # Check each column to see if the difference from the mean is > 2°C. If so, remove
    for column in T_col_list:
        temp.loc[np.abs(temp[column] - temp['T_mean']) > 2, column] = np.nan
    # Re-calculate the mean from the remaining data
    temp['T_mean'] = temp[T_col_list].mean(axis=1)
        
    return(temp['T_mean'])

# Make the mean air temperature,
# remove any value where one of them is more than 2°C off from the mean,
# and then calculate the mean again from the remaining data
yatir_df['Ta'] = mean_tower_temp(yatir_df, ['T 15m Vaisala_C', 'Prof_Tc_13m_C', 'Prof_Tc_15m_C', 'T_PIR_III_K'])

yatir_df['Ts'] = calculate_Ts_simple(yatir_df['Lemitted'], yatir_df['emissivity'])

yatir_df['D_T'] = yatir_df['Ts'] - yatir_df['Ta']

yatir_df['Season'] = 'Summer'
yatir_df['Ecosystem'] = 'Yatir'

# Keep only relevant columns
yatir_df = yatir_df[['DateTime','Season','Ecosystem','H','LE','Rn','Ta','Ts','Pa','H2O','D_T','uStar','Lout','Lemitted','Lin']]

display(yatir_df)
print('Done...')

EC Tower




     100.0 %	 Yatir_2000-2020.csv


Unnamed: 0,DateTime,Season,Ecosystem,H,LE,Rn,Ta,Ts,Pa,H2O,D_T,uStar,Lout,Lemitted,Lin
232320,2013-08-01 00:00:00+00:00,Summer,Yatir,-5.500000,2.174060,,,,93580.132812,8.900000,,0.12,,,
232321,2013-08-01 00:30:00+00:00,Summer,Yatir,-18.700001,-0.482240,,,,93580.132812,8.800000,,0.14,,,
232322,2013-08-01 01:00:00+00:00,Summer,Yatir,-11.000000,1.876140,,,,93580.132812,8.900000,,0.13,,,
232323,2013-08-01 01:30:00+00:00,Summer,Yatir,-21.200001,45.402561,,,,93580.132812,10.000000,,0.14,,,
232324,2013-08-01 02:00:00+00:00,Summer,Yatir,-18.000000,19.784941,,,,93580.132812,9.100000,,0.12,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356539,2020-08-31 21:30:00+00:00,Summer,Yatir,-9.600000,-16.956398,,23.680000,,93209.328125,28.600000,,0.33,,,
356540,2020-08-31 22:00:00+00:00,Summer,Yatir,-9.300000,-11.220856,,23.443333,,93201.007812,28.600000,,0.21,,,
356541,2020-08-31 22:30:00+00:00,Summer,Yatir,-9.600000,22.113760,,23.343333,,93188.625000,28.299999,,0.13,,,
356542,2020-08-31 23:00:00+00:00,Summer,Yatir,,,,23.523333,,93180.765625,24.700001,,0.09,,,


Done...


In [11]:
def daily_maxima(temp, var, group_var=[]):
    temp = temp.copy()
    # Create an identifier for each day
    temp['day'] = temp['DateTime'].dt.strftime('%Y-%m-%d')
    
    full_group_var = ['day'] + group_var
    
    # Calculate the maxima
    daily = temp[full_group_var + [var]].groupby(full_group_var).max()
    daily.reset_index(inplace=True)
    
    daily.dropna(subset=[var], inplace=True)
    
    return(daily)

yatir_vs_desert_df = yatir_df[['DateTime','Ts']].merge(all_df[['DateTime','Ts']], how='inner', on='DateTime')
yatir_vs_desert_df['dTs_desfor'] = yatir_vs_desert_df['Ts_y'] - yatir_vs_desert_df['Ts_x']

# Mean of the daily maxima of each campaign
#display(final_df)
maxima_df = daily_maxima(yatir_vs_desert_df,'dTs_desfor')
display(maxima_df.agg(['mean','std']))
#maxima_final = maxima_df.groupby(['Ecosystem']).agg(['mean','std'])
#display(maxima_final)

Unnamed: 0,dTs_desfor
mean,7.572976
std,3.402375


## t-test Ts Yatir vs. desert

In [12]:
temp = yatir_df[['DateTime','Ts','Lout']].merge(all_df[['DateTime','Ts','Lout']], how='inner', on='DateTime')
temp.rename(columns={'Ts_x': 'Ts_yatir'}, inplace=True)
temp.rename(columns={'Ts_y': 'Ts_yatir_desert'}, inplace=True)
temp.rename(columns={'Lout_x': 'Lout_yatir'}, inplace=True)
temp.rename(columns={'Lout_y': 'Lout_yatir_desert'}, inplace=True)
temp = temp.loc[~temp['Ts_yatir'].isna() & ~temp['Ts_yatir_desert'].isna()].copy()

display(temp)

temp['Time'] = temp['DateTime'].dt.strftime('%H:%M')

# non-paired t-test

a = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00')),'Ts_yatir']
b = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00')),'Ts_yatir_desert']
t = stats.ttest_ind(a, b, nan_policy='omit')
print('Ts Summer, mid-day: P =', pvalue_text(t[1]))

a = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00')),'Lout_yatir']
b = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00')),'Lout_yatir_desert']
t = stats.ttest_ind(a, b, nan_policy='omit')
print('Lout Summer, mid-day: P =', pvalue_text(t[1]))

Unnamed: 0,DateTime,Ts_yatir,Lout_yatir,Ts_yatir_desert,Lout_yatir_desert
39,2013-08-22 10:00:00+00:00,36.598583,498.799988,44.744018,552.888245
40,2013-08-22 10:30:00+00:00,37.663946,505.000000,45.596381,558.704468
41,2013-08-22 11:00:00+00:00,37.753604,505.799988,46.757659,566.539001
42,2013-08-22 11:30:00+00:00,37.628398,506.299988,47.945194,574.268921
43,2013-08-22 12:00:00+00:00,37.749589,506.500000,48.268913,577.268494
...,...,...,...,...,...
1001,2015-08-29 10:30:00+00:00,37.497735,504.799988,41.469897,531.663452
1002,2015-08-29 11:00:00+00:00,38.245630,509.700012,42.701033,539.836914
1003,2015-08-29 11:30:00+00:00,38.545717,511.700012,43.755524,547.063721
1004,2015-08-29 12:00:00+00:00,39.310027,517.099976,43.765044,547.334717


Ts Summer, mid-day: P = <.001
Lout Summer, mid-day: P = <.001


## t-test Ta Yatir vs. desert

In [13]:
temp = yatir_df[['DateTime','Ta']].merge(all_df[['DateTime','Ta']], how='inner', on='DateTime')
temp.rename(columns={'Ta_x': 'Ta_yatir'}, inplace=True)
temp.rename(columns={'Ta_y': 'Ta_yatir_desert'}, inplace=True)
temp = temp.loc[~temp['Ta_yatir'].isna() & ~temp['Ta_yatir_desert'].isna()].copy()

display(temp)

temp['Time'] = temp['DateTime'].dt.strftime('%H:%M')

# non-paired t-test

a = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00')),'Ta_yatir']
b = temp.loc[((temp['Time'] >= '10:00') | (temp['Time'] < '15:00')),'Ta_yatir_desert']
t = stats.ttest_ind(a, b, nan_policy='omit')
print('Ta Summer, mid-day: P =', pvalue_text(t[1]))

Unnamed: 0,DateTime,Ta_yatir,Ta_yatir_desert
39,2013-08-22 10:00:00+00:00,28.8300,28.770020
40,2013-08-22 10:30:00+00:00,29.5125,29.100006
41,2013-08-22 11:00:00+00:00,29.6000,29.770020
42,2013-08-22 11:30:00+00:00,29.5225,29.920013
43,2013-08-22 12:00:00+00:00,30.1600,29.890015
...,...,...,...
1026,2015-08-29 23:00:00+00:00,19.5150,20.880005
1027,2015-08-29 23:30:00+00:00,19.1850,20.589996
1028,2015-08-30 00:00:00+00:00,18.7800,20.670013
1029,2015-08-30 00:30:00+00:00,19.0050,20.450012


Ta Summer, mid-day: P = <.01


## Merge Yatir & Ktura

In [14]:
final_df = pd.concat([all_df, yatir_df])

# Heat capacity of air [J kg-1 K-1]
final_df['cp'] = calculate_cp_moist_air(final_df['Ta'], final_df['H2O'], final_df['Pa'])

# Density of air [kg m-3]
final_df['rho'] = calculate_rho_moist_air(final_df['Ta'], final_df['H2O'], final_df['Pa']) 

# Calculate resistance [s m-1]
final_df['rH'] = final_df['rho'] * final_df['cp'] * final_df['D_T'] / final_df['H']

display(final_df)

Unnamed: 0,DateTime,Month,Year,Ecosystem,H,LE,Pa,RH,TA_average,VPD,...,Lemitted,emissivity,PVe,Rn,Ts,Time,D_T,cp,rho,rH
0,2019-07-09 20:30:00+00:00,July,2019.0,Desert background,76.646301,-4.342080,99048.000000,25.66,308.80,4320.58,...,450.111511,0.87,0.0,-95.718048,35.913190,20:30,0.263196,1014.607124,1.062477,3.701734
1,2019-07-09 21:00:00+00:00,July,2019.0,Desert background,-14.566100,,99097.601562,21.92,308.22,4395.13,...,445.194817,0.87,0.0,-96.925995,35.065714,21:00,-0.004293,1013.086205,1.065295,0.318072
2,2019-07-09 21:30:00+00:00,July,2019.0,Desert background,-9.589140,20.623699,99121.000000,21.71,307.68,4277.14,...,440.941607,0.87,0.0,-96.224976,34.326921,21:30,-0.203078,1012.779334,1.068970,22.927897
3,2019-07-09 22:00:00+00:00,July,2019.0,Desert background,-20.194000,-16.165701,99136.203125,21.65,307.27,4184.02,...,436.965123,0.87,0.0,-94.640991,33.631344,22:00,-0.488651,1012.584170,1.071753,26.260454
4,2019-07-09 22:30:00+00:00,July,2019.0,Desert background,-12.366500,-1.476210,99127.101562,22.36,306.90,4061.50,...,433.521960,0.87,0.0,-92.843018,33.025213,22:30,-0.724787,1012.642274,1.074275,63.758091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356539,2020-08-31 21:30:00+00:00,,,Yatir,-9.600000,-16.956398,93209.328125,,,,...,,,,,,,,1021.611400,1.079329,
356540,2020-08-31 22:00:00+00:00,,,Yatir,-9.300000,-11.220856,93201.007812,,,,...,,,,,,,,1021.600486,1.080576,
356541,2020-08-31 22:30:00+00:00,,,Yatir,-9.600000,22.113760,93188.625000,,,,...,,,,,,,,1021.426533,1.080792,
356542,2020-08-31 23:00:00+00:00,,,Yatir,,,93180.765625,,,,...,,,,,,,,1019.405333,1.077225,


In [15]:
ts_max_yatir = final_df.loc[(final_df['Ecosystem'] == 'Yatir'),'Ts'].max()
ts_max_yatir_desert = final_df.loc[(final_df['Ecosystem'] == 'Yatir desert'),'Ts'].max()
display(ts_max_yatir)
display(ts_max_yatir_desert)

46.21149985273911

49.00546882639878

In [16]:
mean_df = averaging(final_df)

# final_df[['Ecosystem','rho','cp']].agg(['mean','std'])

# print('Rho:', np.round(np.mean(final_df['rho']),2), '±', np.round(np.std(final_df['rho']),2))
# print('Cp: ', np.round(np.mean(final_df['cp']),0), '±', np.round(np.std(final_df['cp']),0))

# Calculate resistance [s m-1]
#mean_df['rH'] = mean_df['rho_mean'] * mean_df['cp_mean'] * mean_df['D_T_mean'] / mean_df['H_mean']
#mean_df['rH_sd'] = ((mean_df['Ts_sd'] **2 + mean_df['Ta_sd'] **2)/(mean_df['Ts_mean'] - mean_df['Ta_mean']) **2  + (mean_df['H_sd'] / mean_df['H_mean']) **2 ) **0.5


# Create a text of summarised values (mean + stddev)
mean_df['Rn'] = mean_df['Rn_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['Rn_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['H'] = mean_df['H_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['H_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['LE'] = mean_df['LE_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['LE_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['Ta'] = mean_df['Ta_mean'].astype(float).round(1).astype(str) + ' (' + mean_df['Ta_sd'].round(1).astype(str) + ')'
mean_df['Ts'] = mean_df['Ts_mean'].astype(float).round(1).astype(str) + ' (' + mean_df['Ts_sd'].round(1).astype(str) + ')'
mean_df['D_T'] = mean_df['D_T_mean'].astype(float).round(1).astype(str) + ' (' + mean_df['D_T_sd'].round(1).astype(str) + ')'
mean_df['rH'] = mean_df['rH_mean'].astype(float).round(0).astype(str) + ' (' + mean_df['rH_sd'].astype(float).round(0).astype(str) + ')'
mean_df['Lout'] = mean_df['Lout_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['Lout_sd'].astype(float).round(0).astype(int).astype(str) + ')'
mean_df['Lin'] = mean_df['Lin_mean'].astype(float).round(0).astype(int).astype(str) + ' (' + mean_df['Lin_sd'].astype(float).round(0).astype(int).astype(str) + ')'

# Remove the original values
mean_df.drop(['H_mean','H_sd','LE_mean','LE_sd','Rn_mean','Rn_sd','Ta_mean','Ta_sd','Ts_mean','Ts_sd','D_T_mean','D_T_sd','Pa_mean','rho_mean','cp_mean',\
              'Pa_sd','H2O_mean','H2O_sd','Lout_mean','Lout_sd','Lin_mean','Lin_sd','rho_sd','cp_sd','rH_mean','rH_sd'], axis=1, inplace=True)
#display(mean_df)

# Convert to long format
out_df = mean_df.pivot(index='Season', columns='Ecosystem').stack(level=[0])
out_df.reset_index(inplace=True)
out_df.drop(['Season'], axis=1, inplace=True)
out_df.rename(columns={'level_1': 'Parameter'}, inplace=True)
display(out_df)

out_df.to_latex(output_path + 'Ketura_Science_resistance_new.tex', index=False)

Ecosystem,Parameter,Desert background,PV field,Yatir,Yatir desert
0,D_T,12.4 (1.2),13.3 (1.7),6.3 (1.0),14.5 (2.1)
1,H,199 (50),344 (85),522 (97),225 (51)
2,LE,34 (22),37 (22),43 (48),34 (42)
3,Lin,392 (13),407 (20),367 (17),382 (15)
4,Lout,586 (26),587 (21),505 (15),550 (17)
5,Rn,410 (35),408 (42),650 (89),452 (71)
6,Ta,37.2 (3.0),36.8 (3.6),30.9 (2.3),29.3 (1.9)
7,Ts,49.6 (3.7),50.1 (2.9),37.1 (2.5),43.9 (2.5)
8,rH,70.0 (15.0),44.0 (13.0),13.0 (3.0),74.0 (25.0)


# Until here

# t-tests diurnal each half-hour

In [115]:
temp = final_df.copy()
temp['Time'] = final_df['DateTime'].dt.strftime('%H:%M')

# Make mean and std dev
df_means = temp.groupby(['Ecosystem','Season','Time']).mean().reset_index()
df_sds   = temp.groupby(['Ecosystem','Season','Time']).std().reset_index()

# rename columns
df_means.rename(columns={'Ts': 'Ts_mean'}, inplace=True)
    
df_sds.rename(columns={'Ts': 'Ts_sd'}, inplace=True)

all_df_diurnal = df_means.merge(df_sds, on=['Ecosystem','Season','Time'])
all_df_diurnal['Season'] = pd.Categorical(all_df_diurnal['Season'], ordered=True, categories=['Spring','Summer','Autumn'])
# Remove the original values
#all_df_diurnal.drop(['Ts_x','Ts_y'], axis=1, inplace=True)

display (all_df_diurnal)

Unnamed: 0,Ecosystem,Season,Time,Year_x,H_x,LE_x,Pa_x,RH_x,TA_average_x,VPD_x,...,albedo_y,WS_average_y,Tsonic_y,uStar_y,Temperaturek_y,Ta_y,Rn_y,emissivity_y,Ts_sd,D_T_y
0,Desert background,Summer,00:00,2019.000000,-11.648523,-2.331297,99092.250000,32.931667,305.580000,3287.933333,...,,0.802158,1.896457,0.071351,1.618608,1.618608,4.280043,0.0,1.173980,0.629194
1,Desert background,Summer,00:30,2019.000000,-8.371357,1.410252,99090.984375,33.215000,305.178333,3206.563333,...,,0.836196,1.947653,0.093552,1.720000,1.720000,5.959974,0.0,1.210147,0.705045
2,Desert background,Summer,01:00,2019.000000,-10.930252,-3.472808,99093.296875,33.865000,304.691667,3089.243333,...,,0.706200,2.072591,0.058648,1.652391,1.652391,6.999988,0.0,1.202715,0.618712
3,Desert background,Summer,01:30,2019.000000,-12.147190,-5.824137,99081.234375,34.380000,304.263333,2993.798333,...,,0.506268,1.985226,0.054291,1.624136,1.624136,7.317245,0.0,1.187288,0.597114
4,Desert background,Summer,02:00,2019.000000,-13.098312,-6.957472,99073.531250,34.941667,303.818333,2893.090000,...,,0.665763,1.949679,0.054838,1.594995,1.594995,6.977234,0.0,1.165803,0.579438
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Yatir desert,Summer,21:30,2014.181818,-3.069034,-1.753356,94671.117188,73.627895,295.756364,784.209364,...,,1.525143,1.556921,,1.128248,1.128248,10.694774,0.0,1.248237,0.938691
188,Yatir desert,Summer,22:00,2014.181818,-4.037160,5.000449,94667.609375,73.385556,295.519545,763.733864,...,,1.476142,1.539936,,1.124276,1.124276,9.210844,0.0,1.308833,0.972105
189,Yatir desert,Summer,22:30,2014.181818,-6.518318,0.973837,94662.656250,73.255556,295.343182,734.045227,...,,1.421431,1.573115,,1.127966,1.127966,9.480728,0.0,1.431796,1.083453
190,Yatir desert,Summer,23:00,2014.217391,-5.852935,1.686053,94621.328125,68.647000,295.396087,856.118739,...,,1.544867,2.683319,,2.114021,2.114021,9.144975,0.0,2.243644,1.170079


# t-tests seasonal means

In [74]:
p_df = ttest_all2(final_df.loc[~final_df['Season'].isna()], ['Season', 'Ecosystem'], ['Ts'])
display(p_df)

All categories: [['Summer'], ['Desert background', 'PV field', 'Yatir desert', 'Yatir']]
Testing data column:  Ts
Done...


Unnamed: 0,Season,Ecosystem,Test,P_Ts
0,Summer,,Desert background vs. PV field,<.001
1,Summer,,Desert background vs. Yatir desert,<.001
2,Summer,,Desert background vs. Yatir,<.001
3,Summer,,PV field vs. Yatir desert,<.001
4,Summer,,PV field vs. Yatir,<.001
5,Summer,,Yatir desert vs. Yatir,<.001


## Lout night values

In [None]:
# t-test

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lout_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lout_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Autumn, night: P =', pvalue_text(t[1]))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lout_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lout_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Spring, night: P =', np.round(t[1],2))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lout_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lout_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Summer, night: P =', pvalue_text(t[1]))

## Lin night values

In [None]:
a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lin_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lin_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Autumn, night: P =', pvalue_text(t[1]))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lin_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lin_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Spring, night: P =', np.round(t[1],2))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lin_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '20:00') | (all_df_diurnal['Time'] < '06:00')),'Lin_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Summer, night: P =', pvalue_text(t[1]))

## Lout mid-day values

In [None]:
a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lout_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lout_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
#t = stats.ttest_ind(a, b, equal_var=False, nan_policy='omit') # Welch t-test for inequal variances
print('Autumn, mid-day: P =', pvalue_text(t[1]))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lout_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lout_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Spring, mid-day: P =', np.round(t[1],2))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lout_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lout_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
#t = stats.ttest_ind(a, b, equal_var=False, nan_policy='omit') # Welch t-test for inequal variances
print('Summer, mid-day: P =', pvalue_text(t[1]))

display(all_df_diurnal)

## $L_{in}$ day values

In [None]:
a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lin_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Autumn') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lin_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Autumn, night: P =', pvalue_text(t[1]))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lin_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Spring') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lin_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Spring, night: P =', np.round(t[1],2))

a = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'Desert background') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lin_mean']
b = all_df_diurnal.loc[(all_df_diurnal['Ecosystem'] == 'PV field') & (all_df_diurnal['Season'] == 'Summer') & ((all_df_diurnal['Time'] >= '10:00') & (all_df_diurnal['Time'] < '15:00')),'Lin_mean']
t = stats.ttest_rel(a, b, nan_policy='omit')
print('Summer, night: P =', pvalue_text(t[1]))

# Graphs

# Lout

In [None]:
cbPalette = ["#02000B", "#2D09DE", "#DE090F", "#80ff80", "#c2c2d6", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

# Convert the times back to a “fake” timestamp:
all_df_diurnal['timestamp2'] = pd.to_datetime(all_df_diurnal['Time'], utc=True)

plt = ggplot(all_df_diurnal)
plt = plt + geom_line(aes(x='timestamp2', y='Lout_mean',linetype='Ecosystem'))
plt = plt + geom_ribbon(aes(x='timestamp2', ymin='Lout_mean - Lout_sd', ymax='Lout_mean + Lout_sd', linetype='Ecosystem'), alpha=0.1)
plt = plt + labs(x='Hour', y='$L_{out}\; (W \; m^{-2}$)', parse=True)
plt = plt + facet_wrap(['Season'])
#plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(size=9,rotation=30,hjust=0.5,weight='bold'),
                  axis_title_x = element_blank(),
                  axis_text_y=element_text(size=9,weight='bold'),
                  strip_text=element_text(size=9,weight='bold'),
                  legend_title=element_blank(),
                  text=element_text(family="serif"), axis_ticks_direction_y='in', axis_ticks_direction_x='in')
plt = plt + theme(legend_position = 'top')
plt = plt + scale_x_datetime(date_breaks = '6 hours', date_labels = '%H:%M')

plt.save(graphs_path + 'lout_diurnal.pdf', width=19, height=7, units='cm', scale=1.3, dpi=600)
plt.save(graphs_path + 'lout_diurnal.png', width=19, height=7, units='cm', scale=1.3, dpi=600)


plt

# Lin

In [None]:
cbPalette = ["#02000B", "#2D09DE", "#DE090F", "#80ff80", "#c2c2d6", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

# Convert the times back to a “fake” timestamp:
all_df_diurnal['timestamp2'] = pd.to_datetime(all_df_diurnal['Time'], utc=True)

plt = ggplot(all_df_diurnal)
plt = plt + geom_line(aes(x='timestamp2', y='Lin_mean',linetype='Ecosystem'))
plt = plt + geom_ribbon(aes(x='timestamp2', ymin='Lin_mean - Lin_sd', ymax='Lin_mean + Lin_sd', linetype='Ecosystem'), alpha=0.1)
plt = plt + labs(x='Hour', y='$L_{in}\; (W \; m^{-2}$)', parse=True)
plt = plt + facet_wrap(['Season'])
#plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(size=9,rotation=30,hjust=0.5,weight='bold'),
                  axis_title_x = element_blank(),
                  axis_text_y=element_text(size=9,weight='bold'),
                  strip_text=element_text(size=9,weight='bold'),
                  legend_title=element_blank(),
                  text=element_text(family="serif"), axis_ticks_direction_y='in', axis_ticks_direction_x='in')
plt = plt + theme(legend_position = 'top')
plt = plt + scale_x_datetime(date_breaks = '6 hours', date_labels = '%H:%M')

plt.save(graphs_path + 'lin_diurnal.pdf', width=19, height=7, units='cm', scale=1.3, dpi=600)
plt.save(graphs_path + 'lin_diurnal.png', width=19, height=7, units='cm', scale=1.3, dpi=600)


plt

# Sin

In [None]:
cbPalette = ["#02000B", "#2D09DE", "#DE090F", "#80ff80", "#c2c2d6", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

# Convert the times back to a “fake” timestamp:
all_df_diurnal['timestamp2'] = pd.to_datetime(all_df_diurnal['Time'], utc=True)

plt = ggplot(all_df_diurnal)
plt = plt + geom_line(aes(x='timestamp2', y='Sin_mean',linetype='Ecosystem'))
plt = plt + geom_ribbon(aes(x='timestamp2', ymin='Sin_mean - Sin_sd', ymax='Sin_mean + Sin_sd', linetype='Ecosystem'), alpha=0.1)
plt = plt + labs(x='Hour', y='$S_{in}\; (W \; m^{-2}$)', parse=True)
plt = plt + facet_wrap(['Season'])
#plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(size=9,rotation=30,hjust=0.5,weight='bold'),
                  axis_title_x = element_blank(),
                  axis_text_y=element_text(size=9,weight='bold'),
                  strip_text=element_text(size=9,weight='bold'),
                  legend_title=element_blank(),
                  text=element_text(family="serif"), axis_ticks_direction_y='in', axis_ticks_direction_x='in')
plt = plt + theme(legend_position = 'top')
plt = plt + scale_x_datetime(date_breaks = '6 hours', date_labels = '%H:%M')

plt.save(graphs_path + 'Sin_diurnal.pdf', width=19, height=7, units='cm', scale=1.3, dpi=600)
plt.save(graphs_path + 'Sin_diurnal.png', width=19, height=7, units='cm', scale=1.3, dpi=600)


plt

# Sout

In [None]:
cbPalette = ["#02000B", "#2D09DE", "#DE090F", "#80ff80", "#c2c2d6", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

# Convert the times back to a “fake” timestamp:
all_df_diurnal['timestamp2'] = pd.to_datetime(all_df_diurnal['Time'], utc=True)

plt = ggplot(all_df_diurnal)
plt = plt + geom_line(aes(x='timestamp2', y='Sout_mean',linetype='Ecosystem'))
plt = plt + geom_ribbon(aes(x='timestamp2', ymin='Sout_mean - Sout_sd', ymax='Sout_mean + Sout_sd', linetype='Ecosystem'), alpha=0.1)
plt = plt + labs(x='Hour', y='$S_{out}\; (W \; m^{-2}$)', parse=True)
plt = plt + facet_wrap(['Season'])
#plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(size=9,rotation=30,hjust=0.5,weight='bold'),
                  axis_title_x = element_blank(),
                  axis_text_y=element_text(size=9,weight='bold'),
                  strip_text=element_text(size=9,weight='bold'),
                  legend_title=element_blank(),
                  text=element_text(family="serif"), axis_ticks_direction_y='in', axis_ticks_direction_x='in')
plt = plt + theme(legend_position = 'top')
plt = plt + scale_x_datetime(date_breaks = '6 hours', date_labels = '%H:%M')

plt.save(graphs_path + 'Sout_diurnal.pdf', width=19, height=7, units='cm', scale=1.3, dpi=600)
plt.save(graphs_path + 'Sout_diurnal.png', width=19, height=7, units='cm', scale=1.3, dpi=600)


plt

# PARin

In [None]:
cbPalette = ["#02000B", "#2D09DE", "#DE090F", "#80ff80", "#c2c2d6", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

# Convert the times back to a “fake” timestamp:
all_df_diurnal['timestamp2'] = pd.to_datetime(all_df_diurnal['Time'], utc=True)

plt = ggplot(all_df_diurnal)
plt = plt + geom_line(aes(x='timestamp2', y='PARin_mean',linetype='Ecosystem'))
plt = plt + geom_ribbon(aes(x='timestamp2', ymin='PARin_mean - Sin_sd', ymax='PARin_mean + PARin_sd', linetype='Ecosystem'), alpha=0.1)
plt = plt + labs(x='Hour', y='$PAR_{in}\; (\mu mol \; m^{-2} \; s^{-1}$)', parse=True)
plt = plt + facet_wrap(['Season'])
#plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(size=9,rotation=30,hjust=0.5,weight='bold'),
                  axis_title_x = element_blank(),
                  axis_text_y=element_text(size=9,weight='bold'),
                  strip_text=element_text(size=9,weight='bold'),
                  legend_title=element_blank(),
                  text=element_text(family="serif"), axis_ticks_direction_y='in', axis_ticks_direction_x='in')
plt = plt + theme(legend_position = 'top')
plt = plt + scale_x_datetime(date_breaks = '6 hours', date_labels = '%H:%M')

plt.save(graphs_path + 'PARin_diurnal.pdf', width=19, height=7, units='cm', scale=1.3, dpi=600)
plt.save(graphs_path + 'PARin_diurnal.png', width=19, height=7, units='cm', scale=1.3, dpi=600)


plt

# PARout

In [None]:
cbPalette = ["#02000B", "#2D09DE", "#DE090F", "#80ff80", "#c2c2d6", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

# Convert the times back to a “fake” timestamp:
all_df_diurnal['timestamp2'] = pd.to_datetime(all_df_diurnal['Time'], utc=True)

plt = ggplot(all_df_diurnal)
plt = plt + geom_line(aes(x='timestamp2', y='PARout_mean',linetype='Ecosystem'))
plt = plt + geom_ribbon(aes(x='timestamp2', ymin='PARout_mean - PARout_sd', ymax='PARout_mean + PARout_sd', linetype='Ecosystem'), alpha=0.1)
plt = plt + labs(x='Hour', y='$PAR_{out}\; (\mu mol \; m^{-2} \; s^{-1}$)', parse=True)
plt = plt + facet_wrap(['Season'])
#plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(size=9,rotation=30,hjust=0.5,weight='bold'),
                  axis_title_x = element_blank(),
                  axis_text_y=element_text(size=9,weight='bold'),
                  strip_text=element_text(size=9,weight='bold'),
                  legend_title=element_blank(),
                  text=element_text(family="serif"), axis_ticks_direction_y='in', axis_ticks_direction_x='in')
plt = plt + theme(legend_position = 'top')
plt = plt + scale_x_datetime(date_breaks = '6 hours', date_labels = '%H:%M')

plt.save(graphs_path + 'PARout_diurnal.pdf', width=19, height=7, units='cm', scale=1.3, dpi=600)
plt.save(graphs_path + 'PARout_diurnal.png', width=19, height=7, units='cm', scale=1.3, dpi=600)


plt

In [None]:
diurnal_Lout_desert_march2018_df = diurnal(all_df, 'Desert', 'March')
diurnal_Lout_desert_march2018_df.to_csv(output_path + 'diurnal_Lout_desert_march2018.csv')

diurnal_Lout_pv_march2018_df = diurnal(all_df, 'Solar', 'March')
diurnal_Lout_pv_march2018_df.to_csv(output_path + 'diurnal_Lout_pv_march2018.csv')

diurnal_Lout_desert_october2018_df = diurnal(all_df, 'Desert', 'October')
diurnal_Lout_desert_october2018_df.to_csv(output_path + 'diurnal_Lout_desert_october2018.csv')

diurnal_Lout_desert_july2019_df = diurnal(all_df, 'Desert', 'July')
diurnal_Lout_desert_july2019_df.to_csv(output_path + 'diurnal_Lout_desert_july2019.csv') 

diurnal_Lout_pv_october2018_df = diurnal(all_df, 'Solar', 10)
diurnal_Lout_pv_october2018_df.to_csv(output_path + 'diurnal_Lout_pv_october2018.csv')

diurnal_Lout_pv_july2019_df = diurnal(all_df, 'Solar', 7)
diurnal_Lout_pv_july2019_df.to_csv(output_path + 'diurnal_Lout_pv_july2019.csv') 