-
Notifications
You must be signed in to change notification settings - Fork 455
/
dataset.py
207 lines (173 loc) · 6.91 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
from __future__ import print_function, division
import os
from collections import OrderedDict
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from six import iteritems
from .building import Building
from .datastore.datastore import join_key
from .utils import get_datastore
from .timeframe import TimeFrame
class DataSet(object):
"""
Attributes
----------
buildings : OrderedDict
Each key is an integer, starting from 1.
Each value is a nilmtk.Building object.
store : nilmtk.DataStore
metadata : dict
Metadata describing the dataset name, authors etc.
(Metadata about specific buildings, meters, appliances etc.
is stored elsewhere.)
See nilm-metadata.readthedocs.org/en/latest/dataset_metadata.html#dataset
"""
def __init__(self, filename=None, format='HDF'):
"""
Parameters
----------
filename : str
path to data set
format : str
format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
"""
self.store = None
self.buildings = OrderedDict()
self.metadata = {}
if filename is not None:
self.import_metadata(get_datastore(filename, format))
def import_metadata(self, store):
"""
Parameters
----------
store : nilmtk.DataStore
"""
self.store = store
self.metadata = store.load_metadata()
self._init_buildings(store)
return self
def save(self, destination):
for b_id, building in iteritems(self.buildings):
building.save(destination, '/building' + str(b_id))
def _init_buildings(self, store):
buildings = store.elements_below_key('/')
buildings.sort()
for b_key in buildings:
building = Building()
building.import_metadata(
store, '/'+b_key, self.metadata.get('name'))
self.buildings[building.identifier.instance] = building
def set_window(self, start=None, end=None):
"""Set the timeframe window on self.store. Used for setting the
'region of interest' non-destructively for all processing.
Parameters
----------
start, end : str or pd.Timestamp or datetime or None
"""
if self.store is None:
raise RuntimeError("You need to set self.store first!")
tz = self.metadata.get('timezone')
if tz is None:
raise RuntimeError("'timezone' is not set in dataset metadata.")
self.store.window = TimeFrame(start, end, tz)
def describe(self, **kwargs):
"""Returns a DataFrame describing this dataset.
Each column is a building. Each row is a feature."""
keys = self.buildings.keys()
keys.sort()
results = pd.DataFrame(columns=keys)
for i, building in iteritems(self.buildings):
results[i] = building.describe(**kwargs)
return results
def plot_good_sections(self, axes=None, label_func=None, gap=0, **kwargs):
"""Plots all good sections for all buildings.
Parameters
----------
axes : list of axes or None.
If None then they will be generated.
Returns
-------
axes : list of axes
"""
n = len(self.buildings)
if axes is None:
n_meters_per_building = [len(elec.all_meters())
for elec in self.elecs()]
gridspec_kw = dict(height_ratios=n_meters_per_building)
fig, axes = plt.subplots(
n, 1, sharex=True, gridspec_kw=gridspec_kw)
assert n == len(axes)
for i, (ax, elec) in enumerate(zip(axes, self.elecs())):
elec.plot_good_sections(ax=ax, label_func=label_func, gap=gap,
**kwargs)
ax.set_title('House {}'.format(elec.building()), y=0.4, va='top')
ax.grid(False)
for spine in ax.spines.values():
spine.set_linewidth(0.5)
if i == n // 2:
ax.set_ylabel('Meter', rotation=0,
ha='center', va='center', y=.4)
ax.set_xlabel('Date')
plt.tight_layout()
plt.subplots_adjust(hspace=0.05)
plt.draw()
return axes
def elecs(self):
return [building.elec for building in self.buildings.values()]
def clear_cache(self):
for elec in self.elecs():
elec.clear_cache()
def plot_mains_power_histograms(self, axes=None, **kwargs):
n = len(self.buildings)
if axes is None:
fig, axes = plt.subplots(n, 1, sharex=True)
assert n == len(axes)
for ax, elec in zip(axes, self.elecs()):
ax = elec.mains().plot_power_histogram(ax=ax, **kwargs)
ax.set_title('House {}'.format(elec.building()))
return axes
def get_activity_script(self, filename):
"""Extracts an activity script from this dataset.
Saves the activity script to an HDF5 file.
Keys in the HDF5 file take the form:
'/building<building_i>/<appliance type>__<appliance instance>'
e.g. '/building1/electric_oven__1'
Spaces in the appliance type are replaced by underscores.
Each table is of fixed format and stores a pd.Series.
The index is the datetime of the start time or end time of
each appliance activation. The values are booleans. True means
the start time of an appliance activation; false means the
end time of an appliance activation.
Parameters
----------
filename : str
The full filename, including path and suffix, for the HDF5 file
for storing the activity script.
"""
store = pd.HDFStore(
filename, mode='w', complevel=9, complib='blosc')
for building in self.buildings.values():
submeters = building.elec.submeters().meters
for meter in submeters:
appliance = meter.dominant_appliance()
key = '/building{:d}/{:s}__{:d}'.format(
building.identifier.instance,
appliance.identifier.type.replace(' ', '_'),
appliance.identifier.instance)
print("Computing activations for", key)
activations = meter.get_activations()
starts = []
ends = []
for activation in activations:
starts.append(activation.index[0])
ends.append(activation.index[-1])
del activations
starts = pd.Series(True, index=starts)
ends = pd.Series(False, index=ends)
script = pd.concat([starts, ends])
script = script.sort_index()
store[key] = script
del starts, ends
store.close()