In [4]:
import pandas as pd
import numpy as np
import random
from datetime import timedelta
from entropy_over_time import SaE_over_time, SpE_over_time, PE_over_time, WPE_over_time, DE_over_time

In [5]:
# Reading the synthetic rooftop PV dataset (Note that the results produced using the dummy dataset
# are likely to be different with the ones shown in the research article)

input_df = pd.read_csv("Input_data/synthetic_rooftop_data.gz", compression='gzip')
input_df.index = pd.to_datetime(input_df["t_stamp_utc"])
input_df = input_df.drop(columns=["t_stamp_utc"])
allsites = list(input_df.columns)

In [None]:
# This cell selects 3 random rooftop PV systems from the dataset to calculate their entropy over time

selected_site_id = dict()
num_to_select = 3

list_of_random_items = random.sample(allsites, num_to_select)

first_random_system = list_of_random_items[0]
second_random_system = list_of_random_items[1]
third_random_system = list_of_random_items[2]

series1 = input_df[first_random_system]
series2 = input_df[second_random_system]
series3 = input_df[third_random_system]

In [None]:
# This cell creates the time-series of sine wave and white noise.

sample_number = len(input_df.index)

amplitude = 100
mean = amplitude
std = amplitude/3

samples_in_one_day = timedelta(days=1)/input_df.index.to_series().diff().median() # sine wave cycle is one day

input_df['WhiteNoise'] = np.random.normal(mean, std, size=sample_number)
input_df['SineWave'] = np.sin(2 * np.pi * 1 * np.arange(sample_number) / samples_in_one_day)

In [None]:
# This cell calculates the WPE over rolling 60-day windows for two PV generation time-series

window_len = timedelta(days=60) # length of rolling windows
series_length = timedelta(days=364) # length of the time series
rolling_step = 1 * int(samples_in_one_day) #one day

WPE_dict = dict()
PE_dict = dict()
DE_dict = dict()
SpE_dict = dict()
SaE_dict = dict()

SaE_dict[first_random_system] = SaE_over_time(series1, series_length, window_len, step_size=rolling_step, order = 3)
SaE_dict[second_random_system] = SaE_over_time(series2, series_length, window_len, step_size=rolling_step, order = 3)
SaE_dict[third_random_system] = SaE_over_time(series3, series_length, window_len, step_size=rolling_step, order = 3)
SaE_dict['WhiteNoise'] = SaE_over_time(input_df['WhiteNoise'], series_length, window_len, step_size=rolling_step, order = 3)
SaE_dict['SineWave'] = SaE_over_time(input_df['SineWave'], series_length, window_len, step_size=rolling_step, order = 3)
print("All Sample Entropy values are calculated!")

SpE_dict[first_random_system] = SpE_over_time(series1, series_length, window_len, step_size=rolling_step)
SpE_dict[second_random_system] = SpE_over_time(series2, series_length, window_len, step_size=rolling_step)
SpE_dict[third_random_system] = SpE_over_time(series3, series_length, window_len, step_size=rolling_step)
SpE_dict['WhiteNoise'] = SpE_over_time(input_df['WhiteNoise'], series_length, window_len, step_size=rolling_step)
SpE_dict['SineWave'] = SpE_over_time(input_df['SineWave'], series_length, window_len, step_size=rolling_step)
print("All Spectral Entropy values are calculated!")

PE_dict[first_random_system] = PE_over_time(series1, series_length, window_len, step_size=rolling_step, dimension=6)
PE_dict[second_random_system] = PE_over_time(series2, series_length, window_len, step_size=rolling_step, dimension=6)
PE_dict[third_random_system] = PE_over_time(series3, series_length, window_len, step_size=rolling_step, dimension=6)
PE_dict['WhiteNoise'] = PE_over_time(input_df['WhiteNoise'], series_length, window_len, step_size=rolling_step, dimension=6)
PE_dict['SineWave'] = PE_over_time(input_df['SineWave'], series_length, window_len, step_size=rolling_step, dimension=6)
print("All Permutation Entropy values are calculated!")

WPE_dict[first_random_system] = WPE_over_time(series1, series_length, window_len, step_size=rolling_step, dimension=6)
WPE_dict[second_random_system] = WPE_over_time(series2, series_length, window_len, step_size=rolling_step, dimension=6)
WPE_dict[third_random_system] = WPE_over_time(series3, series_length, window_len, step_size=rolling_step, dimension=6)
WPE_dict['WhiteNoise'] = WPE_over_time(input_df['WhiteNoise'], series_length, window_len, step_size=rolling_step, dimension=6)
WPE_dict['SineWave'] = WPE_over_time(input_df['SineWave'], series_length, window_len, step_size=rolling_step, dimension=6)
print("All Weighted Permutation Entropy values are calculated!")

DE_dict[first_random_system] = DE_over_time(series1, series_length, window_len, step_size=rolling_step, dimension=5, cls=5)
DE_dict[second_random_system] = DE_over_time(series2, series_length, window_len, step_size=rolling_step, dimension=5, cls=5)
DE_dict[third_random_system] = DE_over_time(series3, series_length, window_len, step_size=rolling_step, dimension=5, cls=5)
DE_dict['WhiteNoise'] = DE_over_time(input_df['WhiteNoise'], series_length, window_len, step_size=rolling_step, dimension=5, cls=5)
DE_dict['SineWave'] = DE_over_time(input_df['SineWave'], series_length, window_len, step_size=rolling_step, dimension=5, cls=5)
print("All Dispersion Entropy values are calculated!")

In [None]:
# This cell creates the index list for the final data-frame.

IndexList = list()
counter = 0

for i in input_df.index + window_len:
    if i == input_df.index[0] + series_length:
        break
    if counter % rolling_step == 0:
        IndexList.append(i)
    counter = counter + 1

In [None]:
# This cell creates the dataframes of each set of entropy values over time for white noise, sine wave, and PV systems

WPE_df = pd.DataFrame(WPE_dict)
WPE_df['newcol'] = IndexList
WPE_df.index = pd.to_datetime(WPE_df['newcol'])
WPE_df = WPE_df.drop(columns=['newcol'])
#WPE_df.to_pickle("SinevsWNvsPVGen_WPE61.pkl")  # one might like to save the generated results

PE_df = pd.DataFrame(PE_dict)
PE_df['newcol'] = IndexList
PE_df.index = pd.to_datetime(PE_df['newcol'])
PE_df = PE_df.drop(columns=['newcol'])
#PE_df.to_pickle("SinevsWNvsPVGen_PE61.pkl")  # one might like to save the generated results

DE_df = pd.DataFrame(DE_dict)
DE_df['newcol'] = IndexList
DE_df.index = pd.to_datetime(DE_df['newcol'])
DE_df = DE_df.drop(columns=['newcol'])
#DE_df.to_pickle("SinevsWNvsPVGen_DE55.pkl")  # one might like to save the generated results

SpE_df = pd.DataFrame(SpE_dict)
SpE_df['newcol'] = IndexList
SpE_df.index = pd.to_datetime(SpE_df['newcol'])
SpE_df = SpE_df.drop(columns=['newcol'])
#SpE_df.to_pickle("SinevsWNvsPVGen_SpE.pkl")  # one might like to save the generated results

SaE_df = pd.DataFrame(SaE_dict)
SaE_df['newcol'] = IndexList
SaE_df.index = pd.to_datetime(SaE_df['newcol'])
SaE_df = SaE_df.drop(columns=['newcol'])
#SaE_df.to_pickle("SinevsWNvsPVGen_SaE3.pkl")  # one might like to save the generated results

In [None]:
# This cell reads the pre-generated dataframes of each set of entropy values over time for white noise, sine wave,
# and PV systems (produced using the original dataset, which could not be shared)

WPE_df = pd.read_pickle("SinevsWNvsPVGen_WPE61.pkl")
PE_df = pd.read_pickle("SinevsWNvsPVGen_PE61.pkl")
DE_df = pd.read_pickle("SinevsWNvsPVGen_DE55.pkl")
SpE_df = pd.read_pickle("SinevsWNvsPVGen_SpE.pkl")
SaE_df = pd.read_pickle("SinevsWNvsPVGen_SaE3.pkl")

In [None]:
# This cell plots the entropy values of 3 time series against those of a sine wave and a white noise (Figure 1)

import matplotlib.pyplot as plt
plt.style.use('default')
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 7
plt.rcParams['figure.dpi'] = 400
plt.rcParams['savefig.dpi'] = 400

cycler = plt.cycler(linestyle=['dashdot', 'dotted', 'dashed', (0, (1, 1)), 'solid'],
                    color=['darkorange', 'magenta','limegreen', 'darkred', 'darkgreen'])
plt.figure(figsize=(3.4, 7.2))

# Plotting the DE figure
ax = plt.subplot2grid((5, 1), (0, 0))
ax.set_prop_cycle(cycler)
ax.plot(DE_df, linewidth=0.75)
plt.legend(["PV generation 1", "PV generation 2", "PV generation 3",
           "White Gaussian noise", "Sine wave signal"], ncol=2,  fontsize=6)
plt.ylabel('DE')
plt.minorticks_off()
ax.axes.xaxis.set_ticklabels([])
plt.tick_params(axis='x', which='both', labelbottom=False)
plt.grid(color='gainsboro', linestyle='--', linewidth=0.4, alpha=0.5)
# for captions:
x0, xmax = plt.xlim()
y0, ymax = plt.ylim()
data_width = xmax - x0
data_height = ymax - y0
plt.text(x0 - data_width * 0.17, ymax - data_height*0.01, 'A', weight='bold',  fontsize=9)

# Plotting the PE figure
ax = plt.subplot2grid((5, 1), (1, 0))
ax.set_prop_cycle(cycler)
ax.plot(PE_df, linewidth=0.75)
plt.ylabel('PE')
plt.minorticks_off()
ax.axes.xaxis.set_ticklabels([])
plt.tick_params(axis='x', which='both', labelbottom=False)
plt.grid(color='gainsboro', linestyle='--', linewidth=0.4, alpha=0.5)
# for captions:
x0, xmax = plt.xlim()
y0, ymax = plt.ylim()
data_width = xmax - x0
data_height = ymax - y0
plt.text(x0 - data_width * 0.17, ymax - data_height*0.01, 'B', weight='bold',  fontsize=9)

# plotting the SaE figure
ax = plt.subplot2grid((5, 1), (2, 0))
ax.set_prop_cycle(cycler)
ax.plot(SaE_df, linewidth=0.75)
plt.ylabel('SaE')
plt.minorticks_off()
plt.minorticks_off()
ax.axes.xaxis.set_ticklabels([])
plt.tick_params(axis='x', which='both', labelbottom=False)
plt.grid(color='gainsboro', linestyle='--', linewidth=0.4, alpha=0.5)
# for captions:
x0, xmax = plt.xlim()
y0, ymax = plt.ylim()
data_width = xmax - x0
data_height = ymax - y0
plt.text(x0 - data_width * 0.17, ymax - data_height*0.01, 'C', weight='bold',  fontsize=9)

# Plotting the SpE figure
ax = plt.subplot2grid((5, 1), (3, 0))
ax.set_prop_cycle(cycler)
ax.plot(SpE_df, linewidth=0.75)
plt.ylabel('SpE')
plt.minorticks_off()
plt.minorticks_off()
ax.axes.xaxis.set_ticklabels([])
plt.tick_params(axis='x', which='both', labelbottom=False)
plt.grid(color='gainsboro', linestyle='--', linewidth=0.4, alpha=0.5)
# for captions:
x0, xmax = plt.xlim()
y0, ymax = plt.ylim()
data_width = xmax - x0
data_height = ymax - y0
plt.text(x0 - data_width * 0.17, ymax - data_height*0.01, 'D', weight='bold',  fontsize=9)

# Plotting the WPE figure
ax = plt.subplot2grid((5, 1), (4, 0))
ax.set_prop_cycle(cycler)
ax.plot(WPE_df, linewidth=0.75)
plt.ylabel('WPE')
plt.minorticks_off()
ax.axes.xaxis.set_ticklabels(
    ['Mar-19', '', 'May-19', '', 'Jul-19', '', 'Sep-19', '', 'Nov-19', '', 'Jan-20'])
plt.grid(color='gainsboro', linestyle='--', linewidth=0.4, alpha=0.5)
# for captions:
x0, xmax = plt.xlim()
y0, ymax = plt.ylim()
data_width = xmax - x0
data_height = ymax - y0
plt.text(x0 - data_width * 0.17, ymax - data_height*0.01, 'E', weight='bold',  fontsize=9)

plt.xlabel('Time, Month')
plt.subplots_adjust(wspace=0, hspace=0.13)
plt.show()
#plt.savefig('Figure1.pdf', dpi = 400, bbox_inches='tight')