# Comparison of meteo conditions

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import glob
import re

from plotnine import *
from mizani.breaks import date_breaks
from mizani.formatters import date_format
# Colours
cbPalette = ['#939393', '#0072B2', '#E69F00', '#CC00CC', '#009E73', '#D55E00', '#CC79A7', '#FF3300', '#F0E442', '#56B4E9']

In [None]:
project_path = './'

data_path   = project_path + '../data/'
# For the Ketura campaigns
truck_ket_fn = data_path + 'Ketura_all_corr.csv'
meteo_ket_fn  = data_path + 'Yotvata/'

# For Yatir desert campaigns
truck_yat_fn = data_path + 'Yatir desert/'
meteo_yat_fn  = '../../data/towerSAS/Yatir_2000-2020.csv' # Here we use the Yatir tower data as a reference

graphs_path = project_path + '../graphs/'

### Functions

In [None]:
def load_tower(fn, silent=False):
    if (not silent):
        print('  -', fn.split('/')[-1])
    temp = pd.read_csv(fn, index_col=None)
    temp.rename({'date_mid_hour': 'DateTime'}, axis=1, inplace=True)
    temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%d%b%y:%H:%M')
    # Remove obsolete columns
    temp.drop(['year','date','DOY','month','weekNo','mid_hour','mmyy','Bat_V','Hum_AC'], axis=1, inplace=True)
    if (not silent): print("    ", '100.0 %\t', fn.split('/')[-1])
    return(temp)

def load_truck(fn, index_col=False, silent=False):
    if (not silent):
        print('  -', fn.split('/')[-1])
    temp = pd.read_csv(fn, index_col=index_col)
    temp.rename(columns={'date_time': 'DateTime'}, inplace=True)
    temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%Y-%m-%d %H:%M:%S')
    temp['DateTime'] = temp['DateTime'] + pd.Timedelta(minutes=15)
    return(temp)

def load_yatir_desert(directory, silent=False):
    if (not silent):
        print('  - Yatir desert:')
        
    file_list = sorted(glob.glob(directory + '**/*fixed.xls', recursive=True))
    data_list = []
    for idx, filename in enumerate(file_list):
        print('    -', filename.split('/')[-1])
        # Load gas data
        temp1 = pd.read_excel(filename, sheet_name='derived (Data)', skiprows=2)
        temp1['DateTime'] = pd.to_datetime(temp1['DateTime'], format='%Y-%m-%d %H:%M:%S')
        # Load radiation data
        temp2 = pd.read_excel(filename, sheet_name='biomet (Data)', skiprows=2)
        temp2['DateTime'] = pd.to_datetime(temp2['DateTime'], format='%Y-%m-%d %H:%M:%S')
        # Merge
        temp = temp1.merge(temp2, on=['DateTime', 'time'], how='outer')
        data_list.append(temp)
    # Combine all the read data
    df = pd.concat(data_list, axis=0, ignore_index=True)
    # Make the timestamp the middle of the halfhour
    df['DateTime'] = df['DateTime'] + pd.Timedelta(minutes=15)
    return(df)

def load_meteo_stn(directory, silent=False):
    if (not silent):
        print('  - Meteo station:')
        
    file_list = sorted(glob.glob(directory + '**/*.csv', recursive=True))
    data_list = []
    for idx, filename in enumerate(file_list):
        print('    -', filename.split('/')[-1])
        temp = pd.read_csv(filename)
        data_list.append(temp)
    # Combine all the read data
    df = pd.concat(data_list, axis=0, ignore_index=True)
    # Fix problems
    df.columns = ['station','DateTime', 'Eg_Wm2', 'RH_perc', 'Ta_C', 'u_ms']
    df['DateTime'] = pd.to_datetime(df['DateTime'], format='%d/%m/%Y %H:%M')
    # Convert to numbers
    df['Eg_Wm2'] = pd.to_numeric(df['Eg_Wm2'], errors='coerce')
    df['RH_perc'] = pd.to_numeric(df['RH_perc'], errors='coerce')
    df['Ta_C'] = pd.to_numeric(df['Ta_C'], errors='coerce')
    df['u_ms'] = pd.to_numeric(df['u_ms'], errors='coerce')
    # Average 10min Yotvata data to half-hours
    df = df.resample('30min', on='DateTime').mean()
    df.reset_index(inplace=True)
    df['DateTime'] = df['DateTime'] + pd.Timedelta(minutes=15)
    return(df)

def pvalue_text(p):
    if(p <= 0.001): p_text = '<.001'
    if(p > 0.001): p_text = '<.01'
    if(p > 0.01): p_text = '<.05'
    if(p > 0.05): p_text = p.round(2).astype(str)
    return(p_text)

def add_season_info(DateTime, season_type='normal'):
    # Create an empty Series of the same length as the available data
    season = pd.Series(np.nan, index=range(len(DateTime)))
    # Convert DateTime to numbers we can deal with
    timestamp = DateTime.dt.strftime('%m%d').astype(int)
    # Fill in the dates
    if(season_type == 'high'):
        season.loc[(timestamp >= 117)  & (timestamp <= 302)] = 'Winter'
        season.loc[(timestamp >= 402)  & (timestamp <= 430)] = 'Spring'
        season.loc[(timestamp >= 627)  & (timestamp <= 922)] = 'Summer'
        season.loc[(timestamp >= 1008) & (timestamp <= 1128)] = 'Autumn'
    else:
        season.loc[(timestamp >= 1207) | (timestamp <= 330)] = 'Winter'
        season.loc[(timestamp >= 331)  & (timestamp <= 530)] = 'Spring'
        season.loc[(timestamp >= 531)  & (timestamp <= 922)] = 'Summer'
        season.loc[(timestamp >= 923)  & (timestamp <= 1206)] = 'Autumn'
    return(season)

# Create annual daily values
def make_daily_annual(temp, groups=[]):
    temp = temp.copy()
    temp['Day'] = temp['DateTime'].dt.strftime('%m-%d')
    annual_df = temp.groupby(['Day'] + groups).mean()
    annual_df.reset_index(inplace = True)
    annual_df['DateTime'] = pd.to_datetime(annual_df['Day'], format='%m-%d')
    return(annual_df)

In [None]:
print('Loading data...')

# Load Ketura data
truck_ket_df = load_truck(truck_ket_fn)
meteo_ket_df = load_meteo_stn(meteo_ket_fn)

# Load Yatir data
truck_yat_df = load_yatir_desert(truck_yat_fn)

yatir_full_df = load_tower(meteo_yat_fn) # Load 20 years
#meteo_yat_df = yatir_full_df.loc[(yatir_full_df['DateTime'].dt.year >= 2018) & (yatir_full_df['DateTime'].dt.year <= 2019)].copy()
meteo_yat_df = yatir_full_df.loc[(yatir_full_df['DateTime'].dt.year == 2013) | (yatir_full_df['DateTime'].dt.year == 2015)].copy()
del [yatir_full_df] # Clean up memory

print('Done...')

In [None]:
# Meteo background in Yatir forest (our tower data)
#--------------------------------------------------
#display(meteo_yat_df.columns.values)
# Rename columns from the tower dataframe
meteo_yat_df.rename(columns={'S_top_atm(CM21_IV)_Wm-2': 'Sin'}, inplace=True)
meteo_yat_df.rename(columns={'L_top_atm(PIR_IV)_Wm-2': 'Lin'}, inplace=True)
meteo_yat_df.rename(columns={'RH 15m Vaisala_%': 'RH'}, inplace=True)
meteo_yat_df.rename(columns={'Son_Hor_Wnd_Spd_m/s': 'u'}, inplace=True)
meteo_yat_df.rename(columns={'Son_Wnd_Dir_Deg': 'u_dir'}, inplace=True)
meteo_yat_df.rename(columns={'AirPress_Pa': 'Pa'}, inplace=True)
# Calculate mean tower Ta
meteo_yat_df['Ta'] = meteo_yat_df[['Prof_Tc_13m_C','Prof_Tc_15m_C','T 15m Vaisala_C','T_PIR_III_K']].mean(axis=1)
# If any sensor is > 2°C from the mean, remove this sensor
meteo_yat_df.loc[np.abs(meteo_yat_df['Ta'] - meteo_yat_df['Prof_Tc_13m_C']) > 2, 'Prof_Tc_13m_C'] = np.nan
meteo_yat_df.loc[np.abs(meteo_yat_df['Ta'] - meteo_yat_df['Prof_Tc_15m_C']) > 2, 'Prof_Tc_15m_C'] = np.nan
meteo_yat_df.loc[np.abs(meteo_yat_df['Ta'] - meteo_yat_df['T 15m Vaisala_C']) > 2, 'T 15m Vaisala_C'] = np.nan
meteo_yat_df.loc[np.abs(meteo_yat_df['Ta'] - meteo_yat_df['T_PIR_III_K']) > 2, 'T_PIR_III_K'] = np.nan
# Re-calculate the final mean tower Ta
meteo_yat_df['Ta'] = meteo_yat_df[['Prof_Tc_13m_C','Prof_Tc_15m_C','T 15m Vaisala_C','T_PIR_III_K']].mean(axis=1)
meteo_yat_df['Ecosystem'] = 'Change (Forest)'

# Yatir desert data (truck)
#--------------------------
#display(truck_yat_df.columns.values)
truck_yat_df.rename(columns={'SW_IN': 'Sin'}, inplace=True)
truck_yat_df.rename(columns={'LW_IN': 'Lin'}, inplace=True)
truck_yat_df.rename(columns={'TA': 'Ta'}, inplace=True)
truck_yat_df.rename(columns={'PA_merge': 'Pa'}, inplace=True)
#truck_yat_df.rename(columns={'RH_average': 'RH'}, inplace=True) # There is already an RH column
truck_yat_df.rename(columns={'WS_average': 'u'}, inplace=True)
truck_yat_df.rename(columns={'WD_average': 'u_dir'}, inplace=True)
truck_yat_df['Ecosystem'] = 'Background (Desert)'

# Convert from K to °C
truck_yat_df['Ta'] = truck_yat_df['Ta'] - 273.15

# Remove bad days, only nighttime
truck_yat_df = truck_yat_df.loc[~((truck_yat_df['DateTime'].dt.year == 2015) &
                                (truck_yat_df['DateTime'].dt.month == 8) &
                                (truck_yat_df['DateTime'].dt.day == 16))]
truck_yat_df = truck_yat_df.loc[~((truck_yat_df['DateTime'].dt.year == 2015) &
                                (truck_yat_df['DateTime'].dt.month == 8) &
                                (truck_yat_df['DateTime'].dt.day == 30))]

# Merge
#------
A = meteo_yat_df[['Ecosystem','DateTime','Sin','Lin','Ta','RH','u','u_dir','Pa']].copy().reset_index(drop=True)
B = truck_yat_df[['Ecosystem','DateTime','Sin','Lin','Ta','RH','u','u_dir','Pa']].copy().reset_index(drop=True)
A['Source'] = 'Meteo Station'
B['Source'] = 'Mobile Lab'
yat_df = pd.concat([A, B], axis=0, ignore_index=True)

# Meteo background in Ketura (Yotvata)
#-------------------------------------
#display(meteo_ket_df.columns.values)
meteo_ket_df.rename(columns={'Ta_C': 'Ta'}, inplace=True)
meteo_ket_df.rename(columns={'RH_perc': 'RH'}, inplace=True)
meteo_ket_df.rename(columns={'u_ms': 'u'}, inplace=True)
meteo_ket_df.rename(columns={'Eg_Wm2': 'Sin'}, inplace=True)
meteo_ket_df['Ecosystem'] = 'Meteo Station'

# Ketura desert data (truck)
#----------------------------
#display(truck_ket_df.columns.values)
truck_ket_df.rename(columns={'SW_IN_merge': 'Sin'}, inplace=True)
truck_ket_df.rename(columns={'LW_IN_merge_corr': 'Lin'}, inplace=True)
truck_ket_df.rename(columns={'Wind_speed': 'u'}, inplace=True)
truck_ket_df.rename(columns={'Wind_direction': 'u_dir'}, inplace=True)
truck_ket_df.rename(columns={'TA_merge': 'Ta'}, inplace=True)
truck_ket_df.rename(columns={'RH': 'RH'}, inplace=True)

# Convert from K to °C
truck_ket_df['Ta'] = truck_ket_df['Ta'] - 273.15

# Merge
#------
A = meteo_ket_df[['Ecosystem','DateTime','Sin', 'Ta','RH','u']].copy().reset_index(drop=True)
B = truck_ket_df[['Ecosystem','DateTime','Sin','Lin','Ta','RH','u','u_dir']].copy().reset_index(drop=True)
A['Source'] = 'Meteo Station'
B['Source'] = 'Mobile Lab'
ket_df = pd.concat([A, B], axis=0, ignore_index=True)

ket_df.loc[ket_df['Ecosystem'] == 'Solar', 'Ecosystem'] = 'Change (PV field)'
ket_df.loc[ket_df['Ecosystem'] == 'Desert', 'Ecosystem'] = 'Background (Desert)'

# Add season information
#-----------------------
# Yatir
yat_df['season'] = add_season_info(yat_df['DateTime'])
yat_df['high_season'] = add_season_info(yat_df['DateTime'], season_type = 'high')
# Ketura
ket_df['season'] = add_season_info(ket_df['DateTime'])
ket_df['high_season'] = add_season_info(ket_df['DateTime'], season_type = 'high')

In [None]:
# Make daily means
ket_annual = make_daily_annual(ket_df, groups=['Ecosystem', 'Source'])
ket_annual['Location'] = 'PV Field Area (Ketura)'

yat_annual = make_daily_annual(yat_df, groups=['Ecosystem', 'Source'])
yat_annual['Location'] = 'Forest Area (Yatir)'

# Remove bad values
ket_annual.loc[ket_annual['Sin'] > 400, 'Sin'] = np.nan # Remove half-day in Ketura PV field
yat_annual.loc[yat_annual['Sin'] < 10, 'Sin'] = np.nan # Remove outlier in Yatir forest data

# Append two dataframes
annual_df = pd.concat([yat_annual, ket_annual], axis=0, ignore_index=True)

In [None]:
# Prepare
#--------
# Convert to long format
annual_long = pd.melt(annual_df[['DateTime', 'Source', 'Location', 'Sin', 'Ta', 'RH', 'u']], id_vars=['DateTime', 'Source', 'Location'])
# Adjust labels
annual_long.loc[annual_long['variable'] == 'Sin', 'variable'] = '$S_{in}$ [$W~m^{-2}$]'
annual_long.loc[annual_long['variable'] == 'Ta', 'variable'] = '$T_{a}$ [°C]'
annual_long.loc[annual_long['variable'] == 'RH', 'variable'] = '$RH$ [%]'
annual_long.loc[annual_long['variable'] == 'u', 'variable'] = '$u$ [$m~s^{-1}$]'

# Make figure
#--------------
plt = ggplot(annual_long)
plt = plt + geom_point(aes(x='DateTime', y='value', colour = 'Source'), size=0.5)
plt = plt + theme_bw()
plt = plt + theme(axis_text_x=element_text(angle=45),
                  axis_title_x=element_blank(), axis_title_y=element_blank())
plt = plt + labs(x='Month', y='$S_{in}$ [$W~m^{-2}$]', colour = 'Source')
plt = plt + scale_x_datetime(breaks=date_breaks('1 month'), labels=date_format('%b'))
plt = plt + scale_colour_manual(values=cbPalette) + scale_fill_manual(values=cbPalette)
plt = plt + facet_grid('variable ~ Location', scales='free_y')
print(plt)

plt.save(graphs_path + 'meteo_parameters.png', width=1.6*15, height=1.6*7.5, units='cm', dpi=600)
plt.save(graphs_path + 'meteo_parameters.pdf', width=1.6*15, height=1.6*7.5, units='cm', dpi=600)