From 0875698c469ce76cb925a1a682e73afc515b7d7b Mon Sep 17 00:00:00 2001 From: saroele Date: Mon, 9 Apr 2018 21:16:08 +0200 Subject: [PATCH 01/11] Finalisation of #19 --- opengrid/library/analysis.py | 32 ++++++++++++++++++++++++++++++-- opengrid/tests/test_analyses.py | 11 +++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/opengrid/library/analysis.py b/opengrid/library/analysis.py index daea53a..172eb2e 100644 --- a/opengrid/library/analysis.py +++ b/opengrid/library/analysis.py @@ -9,6 +9,7 @@ import datetime as dt import pandas as pd import numpy as np +import numbers from opengrid.library.exceptions import EmptyDataFrame @@ -68,7 +69,7 @@ def do_analysis(self, agg, starttime=dt.time.min, endtime=dt.time.max): self.result = pd.DataFrame() -def standby(df, resolution='d'): +def standby(df, resolution='24h', time_window=None): """ Compute standby power @@ -76,11 +77,38 @@ def standby(df, resolution='d'): ---------- df : Pandas DataFrame Electricity Power - resolution : str + resolution : str, default='d' + Resolution of the computation. Data will be resampled to this resolution (as mean) before computation + of the minimum. + String that can be parsed by the pandas resample function, example ='h', '15min', '6h' + time_window : tuple with start-hour and end-hour, default=None + Specify the start-time and end-time for the analysis. + Only data within this time window will be considered. + Both times have to be specified as string ('01:00', '06:30') or as datetime.time() objects + + Returns + ------- + df : pandas.Series with DateTimeIndex in the given resolution """ + def parse_time(t): + if isinstance(t, numbers.Number): + return pd.Timestamp.utcfromtimestamp(t).time() + else: + return pd.Timestamp(t).time() + if df.empty: raise EmptyDataFrame() + # first filter based on the time-window + if time_window is not None: + t_start = parse_time(time_window[0]) + t_end = parse_time(time_window[1]) + if t_start > t_end: + # start before midnight + df = df[(df.index.time >= t_start) | (df.index.time < t_end)] + else: + df = df[(df.index.time >= t_start) & (df.index.time < t_end)] + return df.resample(resolution).min() diff --git a/opengrid/tests/test_analyses.py b/opengrid/tests/test_analyses.py index 4fc903e..b75f4f2 100644 --- a/opengrid/tests/test_analyses.py +++ b/opengrid/tests/test_analyses.py @@ -22,6 +22,17 @@ def test_standby(self): self.assertRaises(EmptyDataFrame, og.analysis.standby, pd.DataFrame) + def test_standby_with_time_window(self): + df = datasets.get('elec_power_min_1sensor') + res = og.analysis.standby(df, 'D', time_window=('01:00', '06:00')) + self.assertEqual(res.index.tz.zone, 'Europe/Brussels') + self.assertEqual(res.to_json(), '{"1507327200000":61.739999936,"1507413600000":214.9799999222,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":109.5599999931,"1508536800000":144.3600001093,"1508623200000":52.7999997279}') + + res = og.analysis.standby(df, 'D', time_window=('22:00', '06:00')) + self.assertEqual(res.index.tz.zone, 'Europe/Brussels') + self.assertEqual(res.to_json(), '{"1507327200000":61.739999936,"1507413600000":119.2800000636,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":96.3000000408,"1508536800000":133.9200000744,"1508623200000":52.7999997279}') + + def test_count_peaks(self): df = datasets.get('gas_dec2016_min') ts = df['313b'].head(100) From 260e5a0cc22163ac07c9eea6c3c663bc996b4229 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 9 Apr 2018 21:56:24 +0200 Subject: [PATCH 02/11] load factor analysis --- opengrid/library/analysis.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/opengrid/library/analysis.py b/opengrid/library/analysis.py index daea53a..bb4b98b 100644 --- a/opengrid/library/analysis.py +++ b/opengrid/library/analysis.py @@ -105,3 +105,33 @@ def count_peaks(ts): result = on_toggles & shifted count = result.sum() return count + + +def load_factor(ts, resolution=None, norm=None): + """ + Calculate the ratio of input vs. norm over a given interval. + + Parameters + ---------- + ts : Pandas Series + timeseries + resolution : str, optional + interval over which to calculate the ratio + default: resolution of the input timeseries + norm : int | float, optional + denominator of the ratio + default: the maximum of the input timeseries + + Returns + ------- + Pandas Series + """ + if norm is None: + norm = ts.max() + + if resolution is not None: + ts = ts.resample(rule=resolution).mean() + + lf = ts/norm + + return lf From a9c8b8faefd542fafdd37f0648c09fac77397811 Mon Sep 17 00:00:00 2001 From: saroele Date: Mon, 9 Apr 2018 22:17:12 +0200 Subject: [PATCH 03/11] Proposed solution for #20 --- opengrid/library/analysis.py | 30 ++++++++++++++++++++++++++++++ opengrid/tests/test_analyses.py | 14 ++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/opengrid/library/analysis.py b/opengrid/library/analysis.py index 172eb2e..9986468 100644 --- a/opengrid/library/analysis.py +++ b/opengrid/library/analysis.py @@ -112,6 +112,36 @@ def parse_time(t): return df.resample(resolution).min() +def share_of_standby(df, resolution='24h', time_window=None): + """ + Compute the share of the standby power in the total consumption. + + Parameters + ---------- + df : Pandas DataFrame + Power (typically electricity, can be anything) + resolution : str, default='d' + Resolution of the computation. Data will be resampled to this resolution (as mean) before computation + of the minimum. + String that can be parsed by the pandas resample function, example ='h', '15min', '6h' + time_window : tuple with start-hour and end-hour, default=None + Specify the start-time and end-time for the analysis. + Only data within this time window will be considered. + Both times have to be specified as string ('01:00', '06:30') or as datetime.time() objects + + Returns + ------- + fraction : float between 0-1 with the share of the standby consumption + """ + + p_sb = standby(df, resolution, time_window) + df_resampled = df.resample(resolution).mean() + p_tot = df_resampled.sum() + p_standby = p_sb.sum() + share_standby = p_standby/p_tot + return share_standby.iloc[0] + + def count_peaks(ts): """ Toggle counter for gas boilers diff --git a/opengrid/tests/test_analyses.py b/opengrid/tests/test_analyses.py index b75f4f2..cdf4997 100644 --- a/opengrid/tests/test_analyses.py +++ b/opengrid/tests/test_analyses.py @@ -7,6 +7,7 @@ import unittest import pandas as pd +import numpy as np import opengrid as og from opengrid import datasets @@ -32,6 +33,19 @@ def test_standby_with_time_window(self): self.assertEqual(res.index.tz.zone, 'Europe/Brussels') self.assertEqual(res.to_json(), '{"1507327200000":61.739999936,"1507413600000":119.2800000636,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":96.3000000408,"1508536800000":133.9200000744,"1508623200000":52.7999997279}') + def test_share_of_standby_1(self): + df = pd.DataFrame(data={'conso':np.ones(48)}, + index=pd.DatetimeIndex(start=pd.Timestamp('20180304'), periods=48, freq='h')) + share_of_standby = og.analysis.share_of_standby(df, resolution='24h') + self.assertEqual(share_of_standby, 1.0) + + def test_share_of_standby_2(self): + df = pd.DataFrame(data={'conso':np.ones(48)}, + index=pd.DatetimeIndex(start=pd.Timestamp('20180304'), periods=48, freq='h')) + df.iloc[0,0] = 0 + share_of_standby = og.analysis.share_of_standby(df, resolution='24h') + self.assertAlmostEqual(share_of_standby, 0.5106382978723404) + def test_count_peaks(self): df = datasets.get('gas_dec2016_min') From eb83cbd5266919afe53947b9bf02c4e4e77be316 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Mon, 9 Apr 2018 22:24:42 +0200 Subject: [PATCH 04/11] test load factor analysis --- opengrid/tests/test_analyses.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/opengrid/tests/test_analyses.py b/opengrid/tests/test_analyses.py index 4fc903e..e5568ba 100644 --- a/opengrid/tests/test_analyses.py +++ b/opengrid/tests/test_analyses.py @@ -28,6 +28,17 @@ def test_count_peaks(self): count = og.analysis.count_peaks(ts) self.assertEqual(count, 13) + def test_load_factor(self): + ts = og.datasets.get('electricity_2016_hour') + ts = ts['e1de'].truncate(after=pd.Timestamp('20160107')) + lf1 = og.analysis.load_factor(ts) + self.assertIsInstance(ts, pd.Series) + self.assertAlmostEqual(ts.iloc[0], (lf1 * ts.max()).iloc[0]) + + lf2 = og.analysis.load_factor(ts, resolution='3h', norm=800) + self.assertIsInstance(ts, pd.Series) + self.assertAlmostEqual(175.0345212009457, (lf2 * 800).iloc[0]) + if __name__ == '__main__': unittest.main() From 3f40f0c06bbb5897989fde6f85ed9719a2190f6c Mon Sep 17 00:00:00 2001 From: saroele Date: Mon, 9 Apr 2018 22:31:01 +0200 Subject: [PATCH 05/11] Ensure dataframe at the entry of share_of_standby --- opengrid/library/analysis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/opengrid/library/analysis.py b/opengrid/library/analysis.py index 9986468..bfbfc30 100644 --- a/opengrid/library/analysis.py +++ b/opengrid/library/analysis.py @@ -134,8 +134,9 @@ def share_of_standby(df, resolution='24h', time_window=None): fraction : float between 0-1 with the share of the standby consumption """ - p_sb = standby(df, resolution, time_window) - df_resampled = df.resample(resolution).mean() + df_ = pd.DataFrame(df) + p_sb = standby(df_, resolution, time_window) + df_resampled = df_.resample(resolution).mean() p_tot = df_resampled.sum() p_standby = p_sb.sum() share_standby = p_standby/p_tot From f6763f5a077c361cae81b56db7dfc07561c4c154 Mon Sep 17 00:00:00 2001 From: maxhelskens Date: Thu, 12 Apr 2018 09:38:58 +0200 Subject: [PATCH 06/11] finished boxplot --- .gitignore | 3 +- opengrid/library/plotting.py | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1324af9..6ad77c4 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ opengrid/notebooks/index.html dist/ build/ -opengrid.egg-info/ \ No newline at end of file +opengrid.egg-info/ +.DS_Store diff --git a/opengrid/library/plotting.py b/opengrid/library/plotting.py index 722d65c..ac32d6b 100644 --- a/opengrid/library/plotting.py +++ b/opengrid/library/plotting.py @@ -1,5 +1,6 @@ import matplotlib.pyplot as plt import matplotlib +import pandas as pd def plot_style(): @@ -13,4 +14,57 @@ def plot_style(): matplotlib.style.use('seaborn-deep') plt.rcParams['figure.figsize'] = 16, 6 + + # To overrule the legend style + plt.rcParams['legend.facecolor'] = "#ffffff" + plt.rcParams['legend.frameon'] = True + plt.rcParams['legend.framealpha'] = 1 + return plt + + +def boxplot(df, plot_mean=False, plot_ids=None): + """ + Plot boxplots + + Plot the boxplots of a dataframe in time + + Parameters + ---------- + df: Pandas Dataframe + Every collumn is a timeseries + plot_mean: bool + Wether or not to plot the means + plot_ids: [str] + List of id's to plot + + Returns + ------- + matplotlib figure + """ + description = df.apply(pd.DataFrame.describe, axis=1) + + # plot + plt = plot_style() + + df.index = df.index.map(lambda x: x.strftime('%b')) + + df = df.T + + fig, ax = plt.subplots() + axes, bp = df.boxplot(ax=ax, return_type='both') + plt.setp(bp['boxes'], color='black') + plt.setp(bp['whiskers'], color='black') + + for id in plot_ids: + ax.scatter(x=axes.get_xticks(), y=df.loc[id], label=str(id)) + + if plot_mean: + ax.scatter(x=axes.get_xticks(), y=description['mean'], label="Mean", color='k', s=30, marker='+') + + plt.xticks(rotation=45) + + ax.legend() + + return fig + From af9a24be0a5a44c6d0289193d7cb58be21c74113 Mon Sep 17 00:00:00 2001 From: saroele Date: Thu, 12 Apr 2018 10:18:17 +0200 Subject: [PATCH 07/11] Bugfix plotting --- opengrid/library/plotting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/opengrid/library/plotting.py b/opengrid/library/plotting.py index 8e4b3f1..c5785d6 100644 --- a/opengrid/library/plotting.py +++ b/opengrid/library/plotting.py @@ -1,3 +1,4 @@ +import os import matplotlib.pyplot as plt import matplotlib import pandas as pd From 387bd3d726c3fe55d7d2e168d58b45b743d53475 Mon Sep 17 00:00:00 2001 From: maxhelskens Date: Thu, 12 Apr 2018 11:14:10 +0200 Subject: [PATCH 08/11] Updated boxplot function --- opengrid/library/plotting.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/opengrid/library/plotting.py b/opengrid/library/plotting.py index c5785d6..4cd6cfb 100644 --- a/opengrid/library/plotting.py +++ b/opengrid/library/plotting.py @@ -145,24 +145,22 @@ def boxplot(df, plot_mean=False, plot_ids=None): # plot plt = plot_style() - df.index = df.index.map(lambda x: x.strftime('%b')) - df = df.T - fig, ax = plt.subplots() - axes, bp = df.boxplot(ax=ax, return_type='both') + axes, bp = df.boxplot(return_type='both') plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') for id in plot_ids: - ax.scatter(x=axes.get_xticks(), y=df.loc[id], label=str(id)) + if id in df.index: + plt.scatter(x=axes.get_xticks(), y=df.loc[id], label=str(id)) if plot_mean: - ax.scatter(x=axes.get_xticks(), y=description['mean'], label="Mean", color='k', s=30, marker='+') + plt.scatter(x=axes.get_xticks(), y=description['mean'], label="Mean", color='k', s=30, marker='+') plt.xticks(rotation=45) - ax.legend() + plt.legend() - return fig + return plt.gcf() From 65b1e26c8246d79994c2077283167a351d414550 Mon Sep 17 00:00:00 2001 From: saroele Date: Thu, 12 Apr 2018 17:27:32 +0200 Subject: [PATCH 09/11] Bugfixes and improvements in the plotting --- opengrid/library/plotting.py | 67 ++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/opengrid/library/plotting.py b/opengrid/library/plotting.py index 4cd6cfb..a00da68 100644 --- a/opengrid/library/plotting.py +++ b/opengrid/library/plotting.py @@ -2,6 +2,7 @@ import matplotlib.pyplot as plt import matplotlib import pandas as pd +import numpy as np import matplotlib.cm as cm from matplotlib.dates import date2num, num2date, HourLocator, DayLocator, AutoDateLocator, DateFormatter from matplotlib.colors import LogNorm @@ -121,46 +122,54 @@ def carpet(timeseries, **kwargs): return im -def boxplot(df, plot_mean=False, plot_ids=None): +def boxplot(df, plot_mean=False, plot_ids=None, title=None, xlabel=None, ylabel=None): """ - Plot boxplots - - Plot the boxplots of a dataframe in time - - Parameters - ---------- - df: Pandas Dataframe - Every collumn is a timeseries - plot_mean: bool - Wether or not to plot the means - plot_ids: [str] - List of id's to plot - - Returns - ------- - matplotlib figure - """ + Plot boxplots + + Plot the boxplots of a dataframe in time + + Parameters + ---------- + df: Pandas Dataframe + Every collumn is a timeseries + plot_mean: bool + Wether or not to plot the means + plot_ids: [str] + List of id's to plot + + Returns + ------- + matplotlib figure + """ + + df = df.applymap(float) description = df.apply(pd.DataFrame.describe, axis=1) # plot plt = plot_style() - df = df.T - - axes, bp = df.boxplot(return_type='both') - plt.setp(bp['boxes'], color='black') - plt.setp(bp['whiskers'], color='black') - - for id in plot_ids: - if id in df.index: - plt.scatter(x=axes.get_xticks(), y=df.loc[id], label=str(id)) + plt.boxplot(df) + #plt.setp(bp['boxes'], color='black') + #plt.setp(bp['whiskers'], color='black') + if plot_ids is not None: + for id in plot_ids: + if id in df.columns: + plt.scatter(x=range(1, len(df) + 1), y=df[id], label=str(id)) if plot_mean: - plt.scatter(x=axes.get_xticks(), y=description['mean'], label="Mean", color='k', s=30, marker='+') + plt.scatter(x=range(1, len(df) + 1), y=description['mean'], label="Mean", color='k', s=30, marker='+') - plt.xticks(rotation=45) + ax = plt.gca() + ax.set_xticklabels(df.index) + #plt.xticks(rotation=45) plt.legend() + if title is not None: + plt.title(title) + if xlabel is not None: + plt.xlabel(xlabel) + if ylabel is not None: + plt.ylabel(ylabel) return plt.gcf() From 98831c8b34afc28cfd2fbf6cbcbfa03a17d1e0bd Mon Sep 17 00:00:00 2001 From: maxhelskens Date: Mon, 23 Apr 2018 20:55:11 +0200 Subject: [PATCH 10/11] unit test boxplot --- opengrid/library/plotting.py | 5 ++++- opengrid/tests/test_plotting.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/opengrid/library/plotting.py b/opengrid/library/plotting.py index a00da68..4136e06 100644 --- a/opengrid/library/plotting.py +++ b/opengrid/library/plotting.py @@ -1,9 +1,12 @@ import os -import matplotlib.pyplot as plt +import os +import numpy as np +import pandas as pd import matplotlib import pandas as pd import numpy as np import matplotlib.cm as cm +import matplotlib.pyplot as plt from matplotlib.dates import date2num, num2date, HourLocator, DayLocator, AutoDateLocator, DateFormatter from matplotlib.colors import LogNorm diff --git a/opengrid/tests/test_plotting.py b/opengrid/tests/test_plotting.py index 2cde7a4..ab00fcd 100644 --- a/opengrid/tests/test_plotting.py +++ b/opengrid/tests/test_plotting.py @@ -18,13 +18,22 @@ def test_default(self): class CarpetTest(unittest.TestCase): def test_default(self): import numpy as np - index = pd.date_range('2015-1-1', '2015-12-31', freq='h') + index = pd.date_range('2015-1-1', '2015-2-1', freq='h') ser = pd.Series(np.random.normal(size=len(index)), index=index, name='abc') assert plotting.carpet(ser) is not None def test_empty(self): assert plotting.carpet(pd.Series(index=list('abc'))) is None +class BoxplotTest(unittest.TestCase): + def test_default(self): + import numpy as np + import pandas as pd + from opengrid.library import plotting + index = pd.date_range('2015-1-1', '2015-2-1', freq='d') + df = pd.DataFrame(index=index, data=np.random.randint(5, size=(len(index),20))) + plotting.boxplot(df) + if __name__ == '__main__': unittest.main() From b0860f74536b0d62a1dcc9cacc843ce41661dc8c Mon Sep 17 00:00:00 2001 From: maxhelskens Date: Mon, 23 Apr 2018 21:08:02 +0200 Subject: [PATCH 11/11] Unit test without defaults --- opengrid/tests/test_plotting.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/opengrid/tests/test_plotting.py b/opengrid/tests/test_plotting.py index ab00fcd..fd9b182 100644 --- a/opengrid/tests/test_plotting.py +++ b/opengrid/tests/test_plotting.py @@ -34,6 +34,14 @@ def test_default(self): df = pd.DataFrame(index=index, data=np.random.randint(5, size=(len(index),20))) plotting.boxplot(df) + def test_arguments(self): + import numpy as np + import pandas as pd + from opengrid.library import plotting + index = pd.date_range('2015-1-1', '2015-2-1', freq='d') + df = pd.DataFrame(index=index, data=np.random.randint(5, size=(len(index),20))) + plotting.boxplot(df, plot_mean=True, plot_ids=[2, 3], title="Title", xlabel="xlable", ylabel="ylable") + if __name__ == '__main__': unittest.main()