Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

finished boxplot #42

Merged
merged 14 commits into from
Apr 26, 2018
Merged
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
opengrid/notebooks/index.html
dist/
build/
opengrid.egg-info/
opengrid.egg-info/
.DS_Store
63 changes: 61 additions & 2 deletions opengrid/library/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import datetime as dt
import pandas as pd
import numpy as np
import numbers
from opengrid.library.exceptions import EmptyDataFrame


Expand Down Expand Up @@ -68,22 +69,80 @@ def do_analysis(self, agg, starttime=dt.time.min, endtime=dt.time.max):
self.result = pd.DataFrame()


def standby(df, resolution='d'):
def standby(df, resolution='24h', time_window=None):
"""
Compute standby power

Parameters
----------
df : Pandas DataFrame
Electricity Power
resolution : str
resolution : str, default='d'
Resolution of the computation. Data will be resampled to this resolution (as mean) before computation
of the minimum.
String that can be parsed by the pandas resample function, example ='h', '15min', '6h'
time_window : tuple with start-hour and end-hour, default=None
Specify the start-time and end-time for the analysis.
Only data within this time window will be considered.
Both times have to be specified as string ('01:00', '06:30') or as datetime.time() objects

Returns
-------
df : pandas.Series with DateTimeIndex in the given resolution
"""

def parse_time(t):
if isinstance(t, numbers.Number):
return pd.Timestamp.utcfromtimestamp(t).time()
else:
return pd.Timestamp(t).time()

if df.empty:
raise EmptyDataFrame()
# first filter based on the time-window
if time_window is not None:
t_start = parse_time(time_window[0])
t_end = parse_time(time_window[1])
if t_start > t_end:
# start before midnight
df = df[(df.index.time >= t_start) | (df.index.time < t_end)]
else:
df = df[(df.index.time >= t_start) & (df.index.time < t_end)]

return df.resample(resolution).min()


def share_of_standby(df, resolution='24h', time_window=None):
"""
Compute the share of the standby power in the total consumption.

Parameters
----------
df : Pandas DataFrame
Power (typically electricity, can be anything)
resolution : str, default='d'
Resolution of the computation. Data will be resampled to this resolution (as mean) before computation
of the minimum.
String that can be parsed by the pandas resample function, example ='h', '15min', '6h'
time_window : tuple with start-hour and end-hour, default=None
Specify the start-time and end-time for the analysis.
Only data within this time window will be considered.
Both times have to be specified as string ('01:00', '06:30') or as datetime.time() objects

Returns
-------
fraction : float between 0-1 with the share of the standby consumption
"""

df_ = pd.DataFrame(df)
p_sb = standby(df_, resolution, time_window)
df_resampled = df_.resample(resolution).mean()
p_tot = df_resampled.sum()
p_standby = p_sb.sum()
share_standby = p_standby/p_tot
return share_standby.iloc[0]


def count_peaks(ts):
"""
Toggle counter for gas boilers
Expand Down
62 changes: 62 additions & 0 deletions opengrid/library/plotting.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
import os
import numpy as np
import pandas as pd
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, num2date, HourLocator, DayLocator, AutoDateLocator, DateFormatter
Expand All @@ -19,6 +22,12 @@ def plot_style():
matplotlib.style.use('seaborn-deep')

plt.rcParams['figure.figsize'] = 16, 6

# To overrule the legend style
plt.rcParams['legend.facecolor'] = "#ffffff"
plt.rcParams['legend.frameon'] = True
plt.rcParams['legend.framealpha'] = 1

return plt


Expand Down Expand Up @@ -114,3 +123,56 @@ def carpet(timeseries, **kwargs):
plt.title(title)

return im


def boxplot(df, plot_mean=False, plot_ids=None, title=None, xlabel=None, ylabel=None):
"""
Plot boxplots

Plot the boxplots of a dataframe in time

Parameters
----------
df: Pandas Dataframe
Every collumn is a timeseries
plot_mean: bool
Wether or not to plot the means
plot_ids: [str]
List of id's to plot

Returns
-------
matplotlib figure
"""

df = df.applymap(float)
description = df.apply(pd.DataFrame.describe, axis=1)

# plot
plt = plot_style()

plt.boxplot(df)
#plt.setp(bp['boxes'], color='black')
#plt.setp(bp['whiskers'], color='black')
if plot_ids is not None:
for id in plot_ids:
if id in df.columns:
plt.scatter(x=range(1, len(df) + 1), y=df[id], label=str(id))

if plot_mean:
plt.scatter(x=range(1, len(df) + 1), y=description['mean'], label="Mean", color='k', s=30, marker='+')

ax = plt.gca()
ax.set_xticklabels(df.index)
#plt.xticks(rotation=45)

plt.legend()
if title is not None:
plt.title(title)
if xlabel is not None:
plt.xlabel(xlabel)
if ylabel is not None:
plt.ylabel(ylabel)

return plt.gcf()

25 changes: 25 additions & 0 deletions opengrid/tests/test_analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import unittest
import pandas as pd
import numpy as np

import opengrid as og
from opengrid import datasets
Expand All @@ -22,6 +23,30 @@ def test_standby(self):

self.assertRaises(EmptyDataFrame, og.analysis.standby, pd.DataFrame)

def test_standby_with_time_window(self):
df = datasets.get('elec_power_min_1sensor')
res = og.analysis.standby(df, 'D', time_window=('01:00', '06:00'))
self.assertEqual(res.index.tz.zone, 'Europe/Brussels')
self.assertEqual(res.to_json(), '{"1507327200000":61.739999936,"1507413600000":214.9799999222,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":109.5599999931,"1508536800000":144.3600001093,"1508623200000":52.7999997279}')

res = og.analysis.standby(df, 'D', time_window=('22:00', '06:00'))
self.assertEqual(res.index.tz.zone, 'Europe/Brussels')
self.assertEqual(res.to_json(), '{"1507327200000":61.739999936,"1507413600000":119.2800000636,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":96.3000000408,"1508536800000":133.9200000744,"1508623200000":52.7999997279}')

def test_share_of_standby_1(self):
df = pd.DataFrame(data={'conso':np.ones(48)},
index=pd.DatetimeIndex(start=pd.Timestamp('20180304'), periods=48, freq='h'))
share_of_standby = og.analysis.share_of_standby(df, resolution='24h')
self.assertEqual(share_of_standby, 1.0)

def test_share_of_standby_2(self):
df = pd.DataFrame(data={'conso':np.ones(48)},
index=pd.DatetimeIndex(start=pd.Timestamp('20180304'), periods=48, freq='h'))
df.iloc[0,0] = 0
share_of_standby = og.analysis.share_of_standby(df, resolution='24h')
self.assertAlmostEqual(share_of_standby, 0.5106382978723404)


def test_count_peaks(self):
df = datasets.get('gas_dec2016_min')
ts = df['313b'].head(100)
Expand Down
19 changes: 18 additions & 1 deletion opengrid/tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,30 @@ def test_default(self):
class CarpetTest(unittest.TestCase):
def test_default(self):
import numpy as np
index = pd.date_range('2015-1-1', '2015-12-31', freq='h')
index = pd.date_range('2015-1-1', '2015-2-1', freq='h')
ser = pd.Series(np.random.normal(size=len(index)), index=index, name='abc')
assert plotting.carpet(ser) is not None

def test_empty(self):
assert plotting.carpet(pd.Series(index=list('abc'))) is None

class BoxplotTest(unittest.TestCase):
def test_default(self):
import numpy as np
import pandas as pd
from opengrid.library import plotting
index = pd.date_range('2015-1-1', '2015-2-1', freq='d')
df = pd.DataFrame(index=index, data=np.random.randint(5, size=(len(index),20)))
plotting.boxplot(df)

def test_arguments(self):
import numpy as np
import pandas as pd
from opengrid.library import plotting
index = pd.date_range('2015-1-1', '2015-2-1', freq='d')
df = pd.DataFrame(index=index, data=np.random.randint(5, size=(len(index),20)))
plotting.boxplot(df, plot_mean=True, plot_ids=[2, 3], title="Title", xlabel="xlable", ylabel="ylable")


if __name__ == '__main__':
unittest.main()