Skip to content

Commit

Permalink
further improving code re-use with align_two_meters function
Browse files Browse the repository at this point in the history
  • Loading branch information
JackKelly committed Jul 10, 2014
1 parent 33f8f8e commit 720e035
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 203 deletions.
39 changes: 2 additions & 37 deletions nilmtk/elecmeter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from .datastore import Key
from .measurement import select_best_ac_type
from .node import Node
from .elecmeterandmetergroup import ElecMeterAndMeterGroup
from .electric import Electric

ElecMeterID = namedtuple('ElecMeterID', ['instance', 'building', 'dataset'])

class ElecMeter(Hashable, ElecMeterAndMeterGroup):
class ElecMeter(Hashable, Electric):
"""Represents a physical electricity meter.
Attributes
Expand Down Expand Up @@ -415,38 +415,3 @@ def clean_and_export(self, destination_datastore):
cleaning steps have been executed and some summary results (e.g. the number of
implausible values removed)"""
raise NotImplementedError


def diff_between_two_meters(master, slave):
"""Returns a generator of pd.Series of
master.power_series() - slave.power_series()
Takes the sample rate and good_periods of `master` and applies to `slave.
Parameters
----------
master, slave : ElecMeter or MeterGroup instances
Returns
-------
generator of 2-tuple: (`diff`, `sum_of_slave_power`). `diff` is a pd.Series and
`sum_of_slave_power` is a float.
"""
sample_period = master.sample_period()
period_alias = '{:d}S'.format(sample_period)

# TODO: preprocessing=[Resample(sample_period)])
sections = master.good_sections()
master_generator = master.power_series(periods=sections)
for master_chunk in master_generator:
slave_generator = slave.power_series(periods=[master_chunk.timeframe],
chunksize=1E9)
slave_chunk = next(slave_generator)

# TODO: do this resampling in the pipeline?
slave_chunk = slave_chunk.resample(period_alias)
master_chunk = master_chunk.resample(period_alias)

diff = (master_chunk - slave_chunk).dropna()
sum_of_slave_power = slave_chunk.sum()
yield diff, sum_of_slave_power
31 changes: 30 additions & 1 deletion nilmtk/elecmeterandmetergroup.py → nilmtk/electric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
class ElecMeterAndMeterGroup(object):
import pandas as pd

class Electric(object):
"""Common implementations of methods shared by ElecMeter and MeterGroup.
"""

Expand Down Expand Up @@ -32,3 +34,30 @@ def min_on_power_threshold(self):
return min(
[appl.metadata.get('on_power_threshold', DEFAULT_ON_POWER_THRESHOLD)
for appl in self.appliances])


def align_two_meters(master, slave, func='power_series'):
"""Returns a generator of 2-column pd.DataFrames. The first column is from
`master`, the second from `slave`.
Takes the sample rate and good_periods of `master` and applies to `slave`.
Parameters
----------
master, slave : ElecMeter or MeterGroup instances
"""
sample_period = master.sample_period()
period_alias = '{:d}S'.format(sample_period)
sections = master.good_sections()
master_generator = getattr(master, func)(periods=sections)
for master_chunk in master_generator:
slave_generator = getattr(slave, func)(periods=[master_chunk.timeframe],
chunksize=1E9)
slave_chunk = next(slave_generator)

# TODO: do this resampling in the pipeline?
slave_chunk = slave_chunk.resample(period_alias)
master_chunk = master_chunk.resample(period_alias)

yield pd.DataFrame({'master': master_chunk, 'slave': slave_chunk})

4 changes: 2 additions & 2 deletions nilmtk/metergroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from .utils import (tree_root, nodes_adjacent_to_root, simplest_type_for,
flatten_2d_list)
from .measurement import select_best_ac_type, AC_TYPES
from .elecmeterandmetergroup import ElecMeterAndMeterGroup
from .electric import Electric

class MeterGroup(ElecMeterAndMeterGroup):
class MeterGroup(Electric):
"""A group of ElecMeter objects. Can contain nested MeterGroup objects.
Implements many of the same methods as ElecMeter.
Expand Down
89 changes: 35 additions & 54 deletions nilmtk/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import pandas as pd
import math
from .metergroup import MeterGroup, iterate_through_submeters_of_two_metergroups
from .elecmeter import diff_between_two_meters
from .electric import align_two_meters

def error_in_assigned_energy(predictions, ground_truth):
"""Compute error in assigned energy.
Expand Down Expand Up @@ -124,10 +124,10 @@ def mean_normalized_error_power(predictions, ground_truth):
for pred_meter, ground_truth_meter in both_sets_of_meters:
total_abs_diff = 0.0
sum_of_ground_truth_power = 0.0
diff_generator = diff_between_two_meters(pred_meter, ground_truth_meter)
for diff, sum_gnd_truth_power_for_chunk in diff_generator:
for aligned_meters in align_two_meters(pred_meter, ground_truth_meter):
diff = (aligned_meters.icol(0) - aligned_meters.icol(1)).dropna()
total_abs_diff += sum(abs(diff))
sum_of_ground_truth_power += sum_gnd_truth_power_for_chunk
sum_of_ground_truth_power += aligned_meters.icol(1).sum()

mne[pred_meter.instance()] = total_abs_diff / sum_of_ground_truth_power

Expand Down Expand Up @@ -158,39 +158,55 @@ def rms_error_power(predictions, ground_truth):
for pred_meter, ground_truth_meter in both_sets_of_meters:
sum_of_squared_diff = 0.0
n_samples = 0
diff_generator = diff_between_two_meters(pred_meter, ground_truth_meter)
for diff, _ in diff_generator:
for aligned_meters in align_two_meters(pred_meter, ground_truth_meter):
diff = (aligned_meters.icol(0) - aligned_meters.icol(1)).dropna()
sum_of_squared_diff += (diff ** 2).sum()
n_samples += len(diff)

error[pred_meter.instance()] = math.sqrt(sum_of_squared_diff / n_samples)

return pd.Series(error)

########## FUNCTIONS BELOW THIS LINE HAVE NOT YET CONVERTED TO NILMTK v0.2 #####


def powers_to_states(powers):
'''Converts power demands into binary states
def f_score(predictions, ground_truth):
'''Compute F1 scores.
.. math::
F_{score}^{(n)} = \\frac
{2 * Precision * Recall}
{Precision + Recall}
Parameters
----------
powers: Pandas DataFrame of type {appliance :
[array of power]}
predictions, ground_truth : nilmtk.MeterGroup
Returns
-------
states: Pandas DataFrame of type {appliance :
[array of states]}
f1_scores : pd.Series
Each index is an meter instance int (or tuple for MeterGroups).
Each value is the F1 score for that appliance.
'''
from sklearn.metrics import f1_score

on_power_threshold = 50
threshold = 30
predicted_states = (predicted_power > threshold).astype(int)
ground_truth_states = (ground_truth_power > threshold).astype(int)
f1_scores = {}

states = pd.DataFrame(np.zeros(power.shape))
states[power > on_power_threshold] = 1
both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
predictions, ground_truth)
# for pred_meter, ground_truth_meter in both_sets_of_meters:
# f1_scores[pred_meter.instance()] = pass

for appliance in predicted_states.columns:
f1_scores[appliance] = f1_score(
ground_truth_states[[appliance]], predicted_states[[appliance]])
return pd.Series(f1_scores)


########## FUNCTIONS BELOW THIS LINE HAVE NOT YET CONVERTED TO NILMTK v0.2 #####

return states

"""
def confusion_matrices(predicted_states, ground_truth_states):
Expand Down Expand Up @@ -333,41 +349,6 @@ def precision_recall(predicted_states, ground_truth_states):
return np.array([prec, rec])
def f_score(predicted_power, ground_truth_power):
'''Compute F1 score
.. math::
F_score^{(n)} = \\frac
{2 * Precision * Recall}
{Precision + Recall}
Parameters
----------
predicted_state: Pandas DataFrame of type {appliance :
[array of predicted states]}
ground_truth_state: Pandas DataFrame of type {appliance :
[array of ground truth states]}
Returns
-------
numpy array where columns represent appliances and rows represent F score
'''
from sklearn.metrics import f1_score
threshold = 30
predicted_states = (predicted_power > threshold).astype(int)
ground_truth_states = (ground_truth_power > threshold).astype(int)
f_score_out = {}
for appliance in predicted_states.columns:
f_score_out[appliance] = f1_score(
ground_truth_states[[appliance]], predicted_states[[appliance]])
return f_score_out
#prec_rec = precision_recall(predicted_states, ground_truth_states)
# return (2 * prec_rec[0, :] * prec_rec[1,:]) / (prec_rec[0,:] +
# prec_rec[1,:])
# return f1_score(ground_truth_states, predicted_states)
def hamming_loss(predicted_state, ground_truth_state):
Expand Down
66 changes: 52 additions & 14 deletions notebooks/test_v0_2_metrics.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:2ae286c4bd340ef4d8cea0ef226554692cb042537aa7b481a607b8f8d16f721a"
"signature": "sha256:03e4b64c7582d6058a6ae9c2ffb2f7b02a058f5df866b274c2e8daf5d16d4eb9"
},
"nbformat": 3,
"nbformat_minor": 0,
Expand Down Expand Up @@ -252,20 +252,31 @@
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "ElecMeterID(instance=(10, 20), building=1, dataset='REDD')",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-11-3c48e5ec6b4a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmne\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean_normalized_error_power\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredictions_metergroup\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mground_truth_metergroup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mmne\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/jack/workspace/python/nilmtk/nilmtk/metrics.pyc\u001b[0m in \u001b[0;36mmean_normalized_error_power\u001b[1;34m(predictions, ground_truth)\u001b[0m\n\u001b[0;32m 121\u001b[0m \u001b[0mmne\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 122\u001b[0m both_sets_of_meters = iterate_through_submeters_of_two_metergroups(\n\u001b[1;32m--> 123\u001b[1;33m predictions, ground_truth)\n\u001b[0m\u001b[0;32m 124\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mpred_meter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mground_truth_meter\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mboth_sets_of_meters\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 125\u001b[0m \u001b[0mtotal_abs_diff\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/jack/workspace/python/nilmtk/nilmtk/metergroup.py\u001b[0m in \u001b[0;36miterate_through_submeters_of_two_metergroups\u001b[1;34m(master, slave)\u001b[0m\n\u001b[0;32m 674\u001b[0m slave_identifier = master_meter.identifier._replace(\n\u001b[0;32m 675\u001b[0m dataset=slave.dataset())\n\u001b[1;32m--> 676\u001b[1;33m \u001b[0mslave_meter\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mslave\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mslave_identifier\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 677\u001b[0m \u001b[0mzipped\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmaster_meter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mslave_meter\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 678\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mzipped\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/jack/workspace/python/nilmtk/nilmtk/metergroup.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 140\u001b[0m group.dataset() == key.dataset):\n\u001b[0;32m 141\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgroup\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 143\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# find MeterGroup from list of ElecMeterIDs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: ElecMeterID(instance=(10, 20), building=1, dataset='REDD')"
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"5 0.496294\n",
"6 0.889508\n",
"7 0.251467\n",
"8 0.314228\n",
"9 0.420332\n",
"11 0.862761\n",
"12 3.573414\n",
"13 31.760268\n",
"14 20.618227\n",
"15 2.130499\n",
"16 4.078660\n",
"17 0.893244\n",
"18 0.963919\n",
"19 0.387947\n",
"(3, 4) 1.384429\n",
"(10, 20) 0.258725\n",
"dtype: float64"
]
}
],
"prompt_number": 11
"prompt_number": 10
},
{
"cell_type": "code",
Expand All @@ -276,15 +287,42 @@
],
"language": "python",
"metadata": {},
"outputs": []
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"5 90.771657\n",
"6 158.747636\n",
"7 11.944890\n",
"8 23.672594\n",
"9 43.961804\n",
"11 148.995636\n",
"12 131.823771\n",
"13 7.964562\n",
"14 86.237925\n",
"15 102.127704\n",
"16 131.342569\n",
"17 42.976850\n",
"18 31.552245\n",
"19 0.007743\n",
"(3, 4) 195.778450\n",
"(10, 20) 130.869652\n",
"dtype: float64"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
"outputs": [],
"prompt_number": 11
}
],
"metadata": {}
Expand Down
157 changes: 62 additions & 95 deletions notebooks/testing_nilmtk_V0.2.ipynb

Large diffs are not rendered by default.

0 comments on commit 720e035

Please sign in to comment.