# Calculation of annual PV field albedo

Due to height restrictions imposed by the field operator that were sufficient for flux data, PV field albedo was obtained through a combination of albedo data of its components and their fraction, as viewed from nadir. This script has 2 parts:
1. Determination of the fractions
2. Calculation of albedo

## Model of PV field fractions across the year (cosine curves fits)

1. Drone imagery taken over the PV field throughout the day during the summer and autumn campaigns were used to measure the fraction of PV panels as well as exposed and shaded soil
2. In order to obtain the shaded soil fraction for each half-hour of the year:
   1. A cosine curve was fitted to each campaign’s shade fraction as a function of time
   2. An additional set of cosine curves were fitted to the yearly course as a function of day-of-year
3. The PV panel fraction was a fixed 51%, and the remaining fraction was exposed soil

Important information (based on https://www.suncalc.org/#/29.7692,34.9695,8/2023.03.21/12:21/1/3): 
- Sumer solstice, highest solar elevation: 21 June 12:41 AM (local time = UTC + 3), i.e. 11:41 Israel winter time
- Winter solstice, lowest solar elevation: 22 Dec 11:38 AM (local time = UTC + 2), i.e. 11:38 Israel winter time
- The 11:30-12:00 measurement period will be used as the maximum solar elevation throughout the year, or more precisely 11:45

In [None]:
import pandas as pd
import numpy as np
import glob
from scipy.optimize import curve_fit
import warnings

from plotnine import *
from mizani.breaks import date_breaks
from mizani.formatters import date_format

In [None]:
# Data location
project_path = './'
data_path = project_path + '../data/'

# Input data
data_fn   = data_path + 'dataset.csv'
albedo_fn = data_path + 'afforestation_albedo.csv'
sw_fn     = data_path + 'met_data_pv.csv'
percentage_manual_fn = data_path + 'PV_percentages_manual.csv'

# Output path
output_path = project_path + '../data/'
out_pv_albedo_seasonal_fn = output_path + 'PV_albedo_campaigns_modelled.csv'
out_pv_albedo_annual_fn   = output_path + 'PV_albedo_annual_modelled.csv'

# Constants
albedo_pv   = 0.05
panel_angle = 30

pv_percent = 51

In [None]:
def load_data(fn, index_col=False, silent=False):
    if (not silent):
        print('  - Loading', fn.split('/')[-1])
    temp = pd.read_csv(fn, index_col=index_col)
    temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%Y-%m-%d %H:%M:%S', utc=True)
    return(temp)

def load_percentage_manual(fn, silent=False):
    if (not silent): print('  -', fn.split('/')[-1])
    temp = pd.read_csv(fn, decimal=',')
    # Organise timestamps
    temp['DateTime'] = temp['filename'].str.slice(4, 17)
    temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%y%m%d_%H%M%S')
    # Adjust to Israel winter time
    temp['DateTime'] = temp['DateTime'] + pd.Timedelta(hours=2)
    # Round to 15min
    temp['DateTime'] = temp['DateTime'].dt.round('15min')
    # Exclude bad data
    temp = temp.loc[(temp['comments'] != 'full shadow, clouds')].copy()
    # Move timestamp to beginning
    col = temp.pop('DateTime')
    temp.insert(0, col.name, col, allow_duplicates=True)
    # Drop timestamp and other unnecessary columns
    temp.drop(['filename','orientation','panels_px','shadow_px','soil_px','total_px'], axis=1, inplace=True)
    # Average by time
    temp = temp.groupby('DateTime').mean()
    temp.reset_index(inplace=True)
    # Rename column
    temp.rename({'panels': 'panel'}, axis=1, inplace=True)
    # Normalise % with panels = 51% (fixed)
    shadow = temp['shadow']
    soil   = temp['soil']
    temp['panel'] = pv_percent
    temp['shadow'] = shadow*(100-pv_percent)/(shadow + soil)
    temp['soil']   = soil*(100-pv_percent)/(shadow + soil)
    return(temp)

def add_ecosystem(df):
    df = df.copy()
    # Add ecosystem
    df['Ecosystem'] = np.nan
    df.loc[(df['DateTime'] > '2018-03-17') & (df['DateTime'] < '2018-03-22'), 'Ecosystem'] = 'Desert background'
    df.loc[(df['DateTime'] > '2018-03-21') & (df['DateTime'] < '2018-03-28'), 'Ecosystem'] = 'PV field'
    
    df.loc[(df['DateTime'] > '2018-10-15') & (df['DateTime'] < '2018-10-25'), 'Ecosystem'] = 'Desert background'
    df.loc[(df['DateTime'] > '2018-10-23') & (df['DateTime'] < '2018-11-01'), 'Ecosystem'] = 'PV field'
    
    df.loc[(df['DateTime'] > '2019-07-08') & (df['DateTime'] < '2019-07-17'), 'Ecosystem'] = 'Desert background'
    df.loc[(df['DateTime'] > '2019-07-15') & (df['DateTime'] < '2019-07-25'), 'Ecosystem'] = 'PV field'
    # Season
    df['Season'] = np.nan
    df.loc[(df['DateTime'] > '2018-03-17') & (df['DateTime'] < '2018-03-28'), 'Season'] = 'Spring'
    df.loc[(df['DateTime'] > '2018-10-15') & (df['DateTime'] < '2018-11-01'), 'Season'] = 'Autumn'
    df.loc[(df['DateTime'] > '2019-07-08') & (df['DateTime'] < '2019-07-25'), 'Season'] = 'Summer'
    
    # Create day identifier
    df['time'] = df['DateTime'].dt.strftime('%H:%M')
    # shift column 'timestamp' to first position
    col = df.pop('time')
    df.insert(0, col.name, col, allow_duplicates=True)
    col = df.pop('Ecosystem')
    df.insert(0, col.name, col, allow_duplicates=True)
    col = df.pop('Season')
    df.insert(0, col.name, col, allow_duplicates=True)
    col = df.pop('DateTime')
    df.insert(0, col.name, col, allow_duplicates=True)
    return(df)

In [None]:
print('Loading data...')

# Manually detected percentages
perc_df = load_percentage_manual(percentage_manual_fn)
perc_df = add_ecosystem(perc_df)

print('Done...')

### Preparations

- Calculate the day-of-year of the middle of the campaign
- Create diurnals for each campaign
- Fit cosine curve to each campaign

In [None]:
# Just show raw data for diagnostics

graph_df = perc_df.copy()
graph_df['time'] = pd.to_datetime(graph_df['time'], format='%H:%M')
graph_df['time'] = graph_df['time'].dt.strftime('%H').astype(float) + graph_df['time'].dt.strftime('%M').astype(float)/60


plt = ggplot(graph_df)
plt = plt + geom_point(aes(x='time', y='shadow', colour='Season'))
plt = plt + labs(x='Time', y='% shadow')
plt = plt + theme_bw()
print(plt)

In [None]:
# Middle of campaign
#--------------------

# Create DOY
def create_middle_doy(temp, timestamp_col, group_cols):
    temp = temp.copy()
    temp['doy'] = temp[timestamp_col].dt.strftime('%j').astype(float)
    # Calculate middle
    out_df = temp[group_cols + ['doy']].groupby(group_cols).agg(['min','max'])
    out_df.reset_index(inplace=True)
    out_df.columns = ['_'.join(col).strip('_') for col in out_df.columns.values]
    out_df['doy'] = np.round((out_df['doy_min'] + out_df['doy_max'])/2, 0)
    # Cleanup
    out_df.drop(['doy_min', 'doy_max'], axis=1, inplace=True)
    return(out_df)

def create_15min_diurnal_df(temp, timestamp_col, group_cols):
    temp = temp.copy()
    # Collapse percentages to diurnals
    temp['DateTime2'] = pd.to_datetime(temp['time'], format='%H:%M')

    # set the DateTime column as the index
    temp.set_index('DateTime2', inplace=True)
    # Remove DateTime, otherwise it crashes
    temp2 = temp.drop(['DateTime'], axis=1).copy()
    # resample the data to 15-minute intervals and apply a smoothing function (e.g., rolling mean)
    out_df = temp2.sort_values(by=['time']).groupby(group_cols).rolling(window=6, min_periods=2).mean()
    out_df.reset_index(inplace=True)
    out_df.rename({'DateTime2': 'DateTime'}, axis=1, inplace=True)
    out_df['time'] = out_df['DateTime'].dt.strftime('%H:%M')
    col = out_df.pop('time')
    out_df.insert(0, col.name, col, allow_duplicates=True)
    out_df.drop(['DateTime'], axis=1, inplace=True)
    
    # Add repr DOY for this
    doy_df = create_middle_doy(temp, timestamp_col, group_cols)
    out_df = out_df.merge(doy_df, on=group_cols, how='left')
    return(out_df)

df_resampled = create_15min_diurnal_df(perc_df, 'DateTime', ['Ecosystem', 'Season'])
display(df_resampled)

In [None]:
# Show data again for diagnostics

graph_df = df_resampled.copy()
graph_df['time'] = pd.to_datetime(graph_df['time'], format='%H:%M')
graph_df['time'] = graph_df['time'].dt.strftime('%H').astype(float) + graph_df['time'].dt.strftime('%M').astype(float)/60


plt = ggplot(graph_df)
plt = plt + geom_point(aes(x='time', y='shadow', colour='Season'))
plt = plt + labs(x='Time', y='% shadow')
plt = plt + theme_bw()
print(plt)

In [None]:
def convert_time_to_decimal(time_data):
    # Convert time
    time_data = pd.to_datetime(time_data, format='%H:%M')
    time_decimal = time_data.dt.strftime('%H').astype(float) + time_data.dt.strftime('%M').astype(float)/60
    return(time_decimal)

# fit cosine curve
def fit_daily_cos_curve(temp):
    temp = temp.copy()
    # Convert time
    temp['time'] = convert_time_to_decimal(temp['time'])

    # Keep only data without NAs
    temp = temp.loc[~temp['shadow'].isna()].copy()

    # Define data for curve fitting
    x_data = temp['time']
    y_data = temp['shadow']

    def shadow_cosine(hour, a, b, c, d):
        return a * np.cos(np.radians(hour+b)) + d
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        param, _ = curve_fit(shadow_cosine, x_data, y_data)
    
    return(param)

def shadow_daily(hour, a, b, c, d):
    return a * np.cos(np.radians(hour+b)) + d

# Run for the entire dataset
#---------------------------
parameters = []
# Iterate over the seasons to fit curves
for current_season in df_resampled['Season'].unique():
    current_doy = df_resampled.loc[df_resampled['Season'] == current_season, 'doy'].unique()[0]
    # Fit the curve
    param = fit_daily_cos_curve(df_resampled.loc[df_resampled['Season'] == current_season])
    param = [current_season, current_doy] + list(param)
    parameters.append(param)
    pass
param_df = pd.DataFrame(parameters, columns = ['Season', 'doy', 'a', 'b', 'c', 'd'])
display(param_df)

In [None]:
# Plot for diagnostics: Summer
#-----------------------------

params = param_df.loc[param_df['Season'] == 'Summer']

# Raw data to be shown in the plot
x_data = convert_time_to_decimal(df_resampled.loc[df_resampled['Season'] == 'Summer', 'time'])
y_data = df_resampled.loc[df_resampled['Season'] == 'Summer', 'shadow']

# Model data
x_data_model = np.arange(0,24, 0.5)
y_data_model = shadow_daily(x_data_model, params['a'].values[0], params['b'].values[0], params['c'].values[0], params['d'].values[0])
y_data_model = np.where(y_data_model > 0, y_data_model, np.nan)

# Plot
plt = ggplot()
plt = plt + geom_line(aes(x=x_data_model, y=y_data_model))
plt = plt + geom_point(aes(x=x_data, y=y_data), colour='blue')
plt = plt + labs(x='Time', y='% shadow')
plt = plt + ggtitle('Summer')
plt = plt + theme_bw()
print(plt)

# Plot for diagnostics: Autumn
#-----------------------------

params = param_df.loc[param_df['Season'] == 'Autumn']

# Raw data to be shown in the plot
x_data = convert_time_to_decimal(df_resampled.loc[df_resampled['Season'] == 'Autumn', 'time'])
y_data = df_resampled.loc[df_resampled['Season'] == 'Autumn', 'shadow']

# Model data
x_data_model = np.arange(0,24, 0.5)
y_data_model = shadow_daily(x_data_model, params['a'].values[0], params['b'].values[0], params['c'].values[0], params['d'].values[0])
y_data_model = np.where(y_data_model > 0, y_data_model, np.nan)

# Plot
plt = ggplot()
plt = plt + geom_line(aes(x=x_data_model, y=y_data_model))
plt = plt + geom_point(aes(x=x_data, y=y_data), colour='blue')
plt = plt + labs(x='Time', y='% shadow')
plt = plt + ggtitle('Autumn')
plt = plt + theme_bw()
print(plt)

In [None]:
# Create new df with some times of day for seasonal fitting
#----------------------------------------------------------
from itertools import product

# Prepare dataframe
seasons = ['Summer', 'Autumn']
times = np.arange(0, 24, 0.25)
cols = list(product(seasons, times))
t_df = pd.DataFrame(cols, columns=['Season','time'])

# Fill with necessary information
t_df['doy'] = np.nan
t_df['shadow'] = np.nan
shadow_perc = []
for current_season in t_df['Season'].unique():
    current_doy = df_resampled.loc[df_resampled['Season'] == current_season, 'doy'].unique()[0]
    params = param_df.loc[param_df['Season'] == current_season]
    # Fit the curve
    shadow_perc = list(shadow_daily(t_df.loc[t_df['Season'] == current_season, 'time'],
                                              params['a'].values[0], params['b'].values[0], params['c'].values[0], params['d'].values[0]))
    t_df.loc[t_df['Season'] == current_season, 'doy'] = current_doy
    t_df.loc[t_df['Season'] == current_season, 'shadow'] = shadow_perc
    pass

display(t_df)

In [None]:
# Fit annual cosine curve
#-------------------------
def fit_annual_cos_curve(temp):
    temp = temp.copy()
    
    # The highest sun elevation (smallest shadow, summer solstice) is on the 21st of June
    # The lowest sun elevation (largest shadow, winter solstice) is on the 22st of December
    doy_summer_solstice = pd.Series(pd.to_datetime('2019-06-21', format='%Y-%m-%d')).dt.strftime('%j').astype(float).values[0]
    doy_winter_solstice = pd.Series(pd.to_datetime('2019-12-22', format='%Y-%m-%d')).dt.strftime('%j').astype(float).values[0]

    # Fit a cosine curve. The offset of the highest and lowest point of the sun from the end and middle of the year is:
    offset = np.mean([365/2-doy_summer_solstice, 365-doy_winter_solstice])

    # Define x and y data for curve fitting
    x_data = list(temp['doy'])
    y_data = list(temp['shadow'])

    # Define a cosine-based function with two parameters
    # a and b represent the amplitude, and phase shift, respectively.
    def shadow_perc_from_doy(doy, a, c):
        return a * np.cos(np.radians((doy-365/2+offset)*360/364)) + c

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        param, _ = curve_fit(shadow_perc_from_doy, x_data, y_data)
    
    return(list(param) + [offset])


# Prepare a df to fill for each 15min period, with the fit parameters for the shade percentage as a function of doy
doy_list = []
for current_time in t_df['time'].unique():
    # Fit the curve
    params = fit_annual_cos_curve(t_df.loc[t_df['time'] == current_time])
    doy_list.append([current_time] + list(params))
    pass
doy_df = pd.DataFrame(doy_list, columns = ['time', 'a', 'c', 'offset'])
#display(doy_df)

In [None]:
# re-define shade function, to apply
def shadow_perc_from_doy(doy, a, c, pv_percent, offset=9.75):
    shadow_perc = a * np.cos(np.radians((doy-365/2+offset)*360/364)) + c
    # Correction for 0% shade, or complete shade
    shadow_perc = np.where(shadow_perc < 0, 0, shadow_perc)
    shadow_perc = np.where(shadow_perc > (100-pv_percent), (100-pv_percent), shadow_perc)
    return shadow_perc

# Fill the entire year's worth of shadow %
#-----------------------------------------
from itertools import product

# Prepare dataframe
doy  = np.arange(1,366)
time = t_df['time'].unique()
cols = list(product(doy, time))
percentage_df = pd.DataFrame(cols, columns=['doy','time'])

# Fill with necessary information
percentage_df = percentage_df.merge(doy_df, on='time', how='left')
percentage_df['shadow'] = shadow_perc_from_doy(percentage_df['doy'], percentage_df['a'], percentage_df['c'], pv_percent)
# Cleanup
percentage_df.drop(['a', 'c', 'offset'], axis=1, inplace=True)

# Fill in panel and soil
#-----------------------
percentage_df['panel'] = pv_percent
percentage_df['soil'] = 100 - percentage_df['panel'] - percentage_df['shadow']

# Check
#display(percentage_df)

plt = ggplot(percentage_df.loc[percentage_df['time'] == 9.00])
plt = plt + geom_line(aes(x='doy', y='shadow'))
plt = plt + labs(x='Day of year', y='% shadow')
plt = plt + theme_bw()
print(plt)

In [None]:
# Save data
def save_percentage_data(fn, temp):
    temp = temp.copy()
    # Convert times back to normal format
    temp['time'] = temp['time'].apply(lambda x: '{:02.0f}:{:02.0f}'.format(*divmod(x * 60, 60)))
    temp.to_csv(output_path + 'PV_percentages_modelled.csv', index=False)
    pass

save_percentage_data(output_path + 'PV_percentages_modelled.csv', percentage_df)

## Annual albedo calculation

Base albedo ($\alpha$) values:
- PV panels: 0.05 (fixed)
- Exposed soil: Extracted as the equivalent of PV desert background albedo
- Shaded soil: Exposed soil $\alpha$, multiplied by diffuse radiation fraction (Methods S5), obtained from above-canopy measurements at the nearby afforestation research station

Calculation method:
1. Extract the soil albedo from the desert, as diurnals
2. Obtain the diffuse fraction from the nearby afforestation research site data
3. Seasonal and annual $\alpha$: Weighted means by $S_{in}$ across the seasonal campaigns or the entire year, respectively


In [None]:
def fix_SWR(temp):
    temp = temp.copy()
    # Fix bad SW values
    temp.loc[temp['SWin'] < 1, 'SWin'] = 0
    temp.loc[temp['SWout'] < 1, 'SWout'] = 0
    temp.loc[temp['SWin'] < 1, 'SWout'] = 0
    temp.loc[temp['PARin'] <= 0, 'SWin'] = 0
    temp.loc[temp['PARin'] <= 0, 'SWout'] = 0
    temp.loc[temp['PARout'] <= 0, 'SWin'] = 0
    temp.loc[temp['PARout'] <= 0, 'SWout'] = 0
    return(temp)

def get_diurnal_soil_albedo(temp):
    temp = fix_SWR(temp)
    
    temp['time'] = temp['DateTime'].dt.strftime('%H:%M')
    
    # Re-calculate albedo from SW measurements (relevant for the desert)
    temp['albedo'] = np.nan
    temp['albedo'] = temp['SWout'] / temp['SWin']

    # Extract desert albedo & rename. This will be used for the soil & shade fractions of the PV field
    desert_albedo = temp.loc[(temp['Ecosystem'] == 'PV desert background'),['time','Season','albedo']].copy()
    desert_albedo.rename({'albedo': 'albedo_soil'}, axis=1, inplace=True)

    # Collapse to diurnals
    desert_albedo = desert_albedo.groupby(['time', 'Season']).mean()
    desert_albedo.reset_index(inplace = True)
    
    return(desert_albedo)

def get_diffuse_data(temp):
    # Afforestation dataset, used for diffuse fraction
    temp = temp.loc[(temp['DateTime'].dt.year == 2018) | (temp['DateTime'].dt.year == 2019)].copy()
    
    # Calculate diffuse fraction in Yatir (Assuming it's the same in Ketura)
    temp['f_dif'] = temp['SWdif'] / temp['SWin']
    
    return(temp[['DateTime','f_dif']])

def interpolate_val(temp):
    temp = temp.copy()
    
    temp['time'] = temp['DateTime'].dt.strftime('%H:%M')
    
    for t in temp['time'].unique():
        temp.loc[temp['time'] == t, 'f_dif'] = temp.loc[temp['time'] == t, 'f_dif'].rolling(window=14, min_periods=1).mean() #interpolate(limit = 5)
        
    temp.drop(['time'], axis=1, inplace=True)
    return(temp)

In [None]:
print('Seasonal albedo calculation:')

# All data
df = load_data(data_fn)
df['DateTime'] = df['DateTime'] + pd.Timedelta('15min')
albedo_df = load_data(albedo_fn) # Dataset also contains shortwave & diffuse radiation

print('  - Extracting PV desert background albedo')
# Obtain desert soil albedo
pv_desert_albedo = get_diurnal_soil_albedo(df)

print('  - Extracting diffuse fraction from nearby afforestation site')
# Get diffuse radiation from afforestation site
dif_df = get_diffuse_data(albedo_df)
# Make rolling means interpolating the diffuse radiation
dif_df = interpolate_val(dif_df)

print('  - Adding time & DOY variables')
# Add day-of-year
df['doy']  = df['DateTime'].dt.strftime('%j').astype(int)
df['time'] = df['DateTime'].dt.strftime('%H:%M')

print('  - Extracting relevant data')
# Extract measurement times in the PV field
pv_albedo = df.loc[df['Ecosystem'] == 'PV field', ['DateTime','doy','time','Season','SWin']].copy()
pv_albedo.loc[pv_albedo['SWin'] < 0, 'SWin'] = 0

print('  - Merging with PV field fractions, diffuse fraction & PV desert background albedo')
# Add PV field fractions
percentages = percentage_df.copy()
percentages['time'] = percentages['time'].apply(lambda x: '{:02.0f}:{:02.0f}'.format(*divmod(x * 60, 60)))
percentages['shadow'] = percentages['shadow']*0.01
percentages['panel']  = percentages['panel']*0.01
percentages['soil']   = percentages['soil']*0.01
pv_albedo = pv_albedo.merge(percentages, on=['doy','time'], how='left')

# Add diffuse fraction
pv_albedo = pv_albedo.merge(dif_df, on=['DateTime'], how='left')

# Add soil albedo
pv_albedo = pv_albedo.merge(pv_desert_albedo, on=['Season','time'], how='left')

print('  - Calculating shade albedo (according to Methods S5)')
# Calculate shade albedo
pv_albedo['albedo_shade'] = pv_albedo['albedo_soil'] * pv_albedo['f_dif']

print('  - Calculating PV field albedo based on fractions')
# Calculate PV field albedo based on percentages
pv_albedo['albedo'] = pv_albedo['albedo_soil'] * pv_albedo['soil'] \
                    + pv_albedo['albedo_shade'] * pv_albedo['shadow'] \
                    + albedo_pv * pv_albedo['panel']

print('  - Saving data to', out_pv_albedo_seasonal_fn)
# Save data
pv_albedo.to_csv(out_pv_albedo_seasonal_fn, index=False)

print('Done...')

# Show some example data
display(pv_albedo.loc[pv_albedo['time'] == '12:15'])

In [None]:
def weighted_avg_and_std(values, weights):
    values = np.ma.masked_invalid(values)
    # Return the weighted average and standard deviation.
    average = np.ma.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.ma.average((values-average)**2, weights=weights)
    
    maximum = np.max(values)
    minimum = np.min(values)
    return (average, np.sqrt(variance), maximum, minimum)

print('Spring:')
spring = pv_albedo.loc[pv_albedo['Season'] == 'Spring']
print(weighted_avg_and_std(spring['albedo'], spring['SWin']))
print()

print('Summer:')
summer = pv_albedo.loc[pv_albedo['Season'] == 'Summer']
print(weighted_avg_and_std(summer['albedo'], summer['SWin']))
print()

print('Autumn:')
autumn = pv_albedo.loc[pv_albedo['Season'] == 'Autumn']
print(weighted_avg_and_std(autumn['albedo'], autumn['SWin']))

In [None]:
graph_df = pv_albedo.copy()
graph_df['time'] = graph_df['DateTime'].dt.strftime('%H%M').astype(int)

plt = ggplot(graph_df)
plt = plt + geom_point(aes(x='time', y='shadow', colour = 'Season'))
plt = plt + labs(x='Time', y='% shadow')
plt = plt + theme_bw()
print(plt)

## Annual Albedo

We assume that the desert soil albedo is constant across the year, using data of all the campaigns

In [None]:
def get_diurnal_soil_albedo_annual(temp):
    temp = fix_SWR(temp)
    
    temp['time'] = temp['DateTime'].dt.strftime('%H:%M')

    # Re-calculate albedo from SW measurements (relevant for the desert)
    temp['albedo'] = np.nan
    temp['albedo'] = temp['SWout'] / temp['SWin']

    # Extract desert albedo & rename. This will be used for the soil & shade fractions of the PV field
    desert_albedo = temp.loc[temp['Ecosystem'] == 'PV desert background',['time','albedo','SWin']].copy()
    desert_albedo.rename({'albedo': 'albedo_soil'}, axis=1, inplace=True)

    # Collapse to diurnals
    desert_albedo = desert_albedo.groupby(['time']).mean()
    desert_albedo.reset_index(inplace = True)
    
    return(desert_albedo)

def get_diffuse_data_annual(temp):
    # Afforestation dataset, used for diffuse fraction
    temp = temp.loc[(temp['DateTime'].dt.year == 2018) | (temp['DateTime'].dt.year == 2019)].copy()
    
    # Calculate diffuse fraction in Yatir (Assuming it's the same in Ketura)
    temp['f_dif'] = temp['SWdif'] / temp['SWin']
    
    # Create columns
    temp['doy'] = temp['DateTime'].dt.strftime('%j').astype(int)
    temp['time'] = temp['DateTime'].dt.strftime('%H:%M')
    
    # Summarise
    summarised = temp[['doy', 'time', 'f_dif']].groupby(['doy', 'time']).mean()
    summarised.reset_index(inplace=True)
    return(summarised)

def annual_SWin(temp, SWin_col):
    temp = temp.copy()
    
    # Rename column
    temp.rename({SWin_col: 'SWin'}, axis=1, inplace=True)
    
    # Replace NAs at night
    temp.loc[temp['SWin'].isna(), 'SWin'] = 0
    
    # Create columns
    temp['doy'] = temp['DateTime'].dt.strftime('%j').astype(int)
    temp['time'] = temp['DateTime'].dt.strftime('%H:%M')
    
    # Summarise
    summarised = temp[['doy', 'time', 'SWin']].groupby(['doy', 'time']).mean()
    summarised.reset_index(inplace=True)
    return(summarised)

def weighted_avg_and_std(values, weights):
    values = np.ma.masked_invalid(values)
    # masked_arr = np.ma.masked_array(arr, np.isnan(arr))
    
    # Return the weighted average and standard deviation.
    average = np.ma.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.ma.average((values-average)**2, weights=weights)
    
    maximum = np.max(values)
    minimum = np.min(values)
    return (average, np.sqrt(variance), maximum, minimum)

In [None]:
print('Annual albedo calculation:')

# All data
df = load_data(data_fn)
df['DateTime'] = df['DateTime'] + pd.Timedelta('15min')
albedo_df = load_data(albedo_fn) # Dataset also contains shortwave & diffuse radiation
# Meteo station SWin
sw_df = load_data(sw_fn)

print('  - Extracting PV desert background albedo')
# Obtain desert soil albedo
pv_desert_albedo = get_diurnal_soil_albedo_annual(df)

mean, std, maximum, minimum = weighted_avg_and_std(pv_desert_albedo['albedo_soil'], pv_desert_albedo['SWin'])
print('    - Mean: ', str(np.round(mean, 2)) + '±' + str(np.round(std, 3)))
print('    - Max.: ', str(np.round(maximum, 2)))
print('    - Min.: ', str(np.round(minimum, 2)))
mean_desert = mean
std_desert  = std

# Get shortwave radiation from meteo station, as DOY and time (required for weighted average)
print('  - Obtaining SWR from nearby meteo station (annual diurnal, based on 2018-2019)')
sw_df = annual_SWin(sw_df, 'SWin')
pv_desert_albedo.rename({'SWin':'SWin_desert'}, axis=1, inplace=True)
pv_albedo = sw_df.merge(pv_desert_albedo, on=['time'], how='left')

print('  - Merging with PV field fractions, diffuse fraction & PV desert background albedo')
# Add PV field fractions
percentages = percentage_df.copy()
percentages['time'] = percentages['time'].apply(lambda x: '{:02.0f}:{:02.0f}'.format(*divmod(x * 60, 60)))
percentages['shadow'] = percentages['shadow']*0.01
percentages['panel']  = percentages['panel']*0.01
percentages['soil']   = percentages['soil']*0.01
pv_albedo = pv_albedo.merge(percentages, on=['doy','time'], how='left')

print('  - Extracting diffuse fraction from nearby afforestation site')
# Get diffuse radiation from afforestation site
dif_df = get_diffuse_data_annual(albedo_df)

# Add diffuse fraction
pv_albedo = pv_albedo.merge(dif_df, on=['doy','time'], how='left')

# Calculate shade albedo
print('  - Calculate shade albedo')
pv_albedo['albedo_shade'] = pv_albedo['albedo_soil'] * pv_albedo['f_dif']

# Calculate PV field albedo based on percentages
print('  - Calculate PV field albedo')
pv_albedo['albedo'] = pv_albedo['albedo_soil'] * pv_albedo['soil'] \
                    + pv_albedo['albedo_shade'] * pv_albedo['shadow'] \
                    + albedo_pv * pv_albedo['panel']

# Save data
print('  - Save data to ' + out_pv_albedo_annual_fn)
pv_albedo.to_csv(out_pv_albedo_annual_fn, index=False)

# Calculate weighted mean
print('  - Calculate weighted mean')
mean, std, maximum, minimum = weighted_avg_and_std(pv_albedo['albedo'], pv_albedo['SWin'])
print('    - Mean: ', str(np.round(mean, 2)) + '±' + str(np.round(std, 3)))
print('    - Max.: ', str(np.round(maximum, 2)))
print('    - Min.: ', str(np.round(minimum, 2)))
mean_pv = mean
std_pv  = std

print('  - Change in albedo from installing PV')
delta_albedo = mean_desert - mean_pv
std_delta_albedo = np.sqrt(std_desert**2 + std_pv**2)
print('    - Mean: ', str(np.round(delta_albedo, 2)) + '±' + str(np.round(std_delta_albedo, 3)))

print('Done...')