-
Notifications
You must be signed in to change notification settings - Fork 455
/
utils.py
204 lines (158 loc) · 5.91 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
from __future__ import print_function, division
import os, copy, sys
import numpy as np
import pandas as pd
from scipy import stats
def get_immediate_subdirectories(dir):
# From Richie Hindle's StackOverflow answer:
# http://stackoverflow.com/a/800201/732596
if dir:
subdirs = [name for name in os.listdir(dir)
if os.path.isdir(os.path.join(dir, name))]
else:
subdirs = []
return subdirs
def find_nearest_index(array, value):
idx = (np.abs(array - value)).argmin()
return idx
def find_nearest(array, value):
idx = (np.abs(array - value)).argmin()
diff = array[idx] - value
return [idx, -diff]
def find_nearest_non_vectorized(known_array, test_array):
indices = np.zeros(len(test_array))
residual = np.zeros(len(test_array))
for i in xrange(len(test_array)):
[indices[i], residual[i]] = find_nearest(known_array, test_array[i])
return [indices, residual]
def find_nearest_vectorized(known_array, test_array):
# Recipe borrowed from
# http://stackoverflow.com/questions/20780017/numpy-vectorize-finding-closest-value-in-an-array-for-each-element-in-another-a
differences = (test_array.reshape(1, -1) - known_array.reshape(-1, 1))
indices = np.abs(differences).argmin(axis=0)
residual = np.diagonal(differences[indices, ])
return [indices, residual]
def find_nearest_searchsorted(known_array, test_array):
index_sorted = np.argsort(known_array)
known_array_sorted = known_array[index_sorted]
idx1 = np.searchsorted(known_array_sorted, test_array)
idx2 = np.clip(idx1 - 1, 0, len(known_array_sorted) - 1)
diff1 = known_array_sorted[idx1] - test_array
diff2 = test_array - known_array_sorted[idx2]
indices = index_sorted[np.where(diff1 <= diff2, idx1, idx2)]
residual = test_array - known_array[indices]
return [indices, residual]
def secs_per_period_alias(alias):
"""Seconds for each Pandas period alias."""
dr = pd.date_range('00:00', periods=2, freq=alias)
return (dr[-1] - dr[0]).total_seconds()
def is_namedtuple(obj, nt):
"""Returns true if obj is a namedtuple of type nt.
Does what you might expect `isinstance(obj, nt)` to do, but doesn't.
"""
# we can't use isinstance on NamedTuples like isinstance(col_name, DualSupply)
# see http://bugs.python.org/issue7796
try:
for field in nt._fields:
obj.__dict__[field]
except (AttributeError, KeyError):
return False
else:
return True
def recursive_resolve(obj, dict_name):
"""Returns `obj.dict_name` where `dict_name` is a string
which may have periods e.g. `utility.electric.appliances`.
Parameters
----------
obj : object
dict_name : string
e.g. 'utility.electric.appliances'
Returns
-------
obj.dict_name
Examples
--------
To get buildling.utility.electric.appliances:
>>> appliances = recursive_resolve(building, 'utility.electric.appliances')
"""
partitions = dict_name.partition('.')
if not partitions[0]:
return
elif not partitions[2]:
# e.g. partitions = ('electric', '', '')
return obj.__dict__[dict_name]
else:
return recursive_resolve(obj.__dict__[partitions[0]], partitions[2])
def apply_func_to_values_of_dicts(obj, func, dict_names):
"""Apply a generic function `func` to all values of a set dicts,
each of which is an attribute of an arbitrary object `obj`.
Parameters
----------
obj : object
any object which has one or more dicts as attributes
func : function
the function to apply to each dict value
dict_names : list of strings
the attribute names of the dicts in `obj`
Returns
-------
obj_copy : a deepcopy of `obj` with `func` applied to all `obj.<dict_names>`
Examples
--------
For example, to apply `resample` to the `circuits` and `mains` dicts of
an Electricity object:
>>> resample = lambda df : pd.DataFrame.resample(df, rule='T')
>>> electric = apply_func_to_values_of_dicts(electric,
resample,
['circuits', 'mains'])
"""
# TODO: a lot of functions in nilmtk.preprocessing.electricity.buildling
# could be simplified using `apply_to_values_of_dicts`
obj_copy = copy.deepcopy(obj)
for attribute in dict_names:
dict_ = recursive_resolve(obj_copy, attribute)
for key, value in dict_.iteritems():
try:
dict_[key] = func(value)
except:
print("Exception occurred while processing attribute={}, key={}"
.format(attribute, key), file=sys.stderr)
raise
return obj_copy
def timedelta64_to_secs(timedelta):
"""Convert `timedelta` to seconds.
Parameters
----------
timedelta : np.timedelta64
Returns
-------
float : seconds
"""
return timedelta / np.timedelta64(1, 's')
def summary_stats_string(data, fmt='{:>6.2f}', sep='\n', stat_strings=None,
minimal=False):
data = np.array(data)
s = ''
# use eval, use loop
# numpy stat_strings
if stat_strings is None:
stat_strings = ['min', 'mean', 'mode', 'max', 'std']
scipy_stats = ['mode']
numpy_stats = ['median'] # stats which aren't methods of np.Array
for stat_str in stat_strings:
if not minimal:
s += ' {:5s}'.format(stat_str) + '='
try:
if stat_str in scipy_stats:
stat = stats.__dict__[stat_str](data)[0][0]
elif stat_str in numpy_stats:
stat = np.__dict__[stat_str](data)
else:
stat = data.__getattribute__(stat_str)()
except ValueError:
s += 'NA'
else:
s += fmt.format(stat)
if stat_str != stat_strings[-1]:
s += sep
return s