further improving code re-use with align_two_meters function

nilmtk · Jul 10, 2014 · 720e035 · 720e035
1 parent 33f8f8e
commit 720e035
Show file tree

Hide file tree

Showing 6 changed files with 183 additions and 203 deletions.
diff --git a/nilmtk/elecmeter.py b/nilmtk/elecmeter.py
@@ -9,11 +9,11 @@
 from .datastore import Key
 from .measurement import select_best_ac_type
 from .node import Node
-from .elecmeterandmetergroup import ElecMeterAndMeterGroup
+from .electric import Electric
 
 ElecMeterID = namedtuple('ElecMeterID', ['instance', 'building', 'dataset'])
 
-class ElecMeter(Hashable, ElecMeterAndMeterGroup):
+class ElecMeter(Hashable, Electric):
     """Represents a physical electricity meter.
     
     Attributes
@@ -415,38 +415,3 @@ def clean_and_export(self, destination_datastore):
         cleaning steps have been executed and some summary results (e.g. the number of
         implausible values removed)"""
         raise NotImplementedError
-
-
-def diff_between_two_meters(master, slave):
-    """Returns a generator of pd.Series of 
-    master.power_series() - slave.power_series()
-
-    Takes the sample rate and good_periods of `master` and applies to `slave.
-
-    Parameters
-    ----------
-    master, slave : ElecMeter or MeterGroup instances
-
-    Returns
-    -------
-    generator of 2-tuple: (`diff`, `sum_of_slave_power`).  `diff` is a pd.Series and 
-    `sum_of_slave_power` is a float.
-    """
-    sample_period = master.sample_period()
-    period_alias = '{:d}S'.format(sample_period)
-
-    # TODO: preprocessing=[Resample(sample_period)])
-    sections = master.good_sections()
-    master_generator = master.power_series(periods=sections)
-    for master_chunk in master_generator:
-        slave_generator = slave.power_series(periods=[master_chunk.timeframe], 
-                                             chunksize=1E9)
-        slave_chunk = next(slave_generator)
-
-        # TODO: do this resampling in the pipeline?
-        slave_chunk = slave_chunk.resample(period_alias)
-        master_chunk = master_chunk.resample(period_alias)
-
-        diff = (master_chunk - slave_chunk).dropna()
-        sum_of_slave_power = slave_chunk.sum()
-        yield diff, sum_of_slave_power
diff --git a/nilmtk/elecmeterandmetergroup.py → nilmtk/electric.py b/nilmtk/elecmeterandmetergroup.py → nilmtk/electric.py
@@ -1,4 +1,6 @@
-class ElecMeterAndMeterGroup(object):
+import pandas as pd
+
+class Electric(object):
     """Common implementations of methods shared by ElecMeter and MeterGroup.
     """
 
@@ -32,3 +34,30 @@ def min_on_power_threshold(self):
         return min(
             [appl.metadata.get('on_power_threshold', DEFAULT_ON_POWER_THRESHOLD)
              for appl in self.appliances])
+
+
+def align_two_meters(master, slave, func='power_series'):
+    """Returns a generator of 2-column pd.DataFrames.  The first column is from
+    `master`, the second from `slave`.
+
+    Takes the sample rate and good_periods of `master` and applies to `slave`.
+
+    Parameters
+    ----------
+    master, slave : ElecMeter or MeterGroup instances
+    """
+    sample_period = master.sample_period()
+    period_alias = '{:d}S'.format(sample_period)
+    sections = master.good_sections()
+    master_generator = getattr(master, func)(periods=sections)
+    for master_chunk in master_generator:
+        slave_generator = getattr(slave, func)(periods=[master_chunk.timeframe],
+                                               chunksize=1E9)
+        slave_chunk = next(slave_generator)
+
+        # TODO: do this resampling in the pipeline?
+        slave_chunk = slave_chunk.resample(period_alias)
+        master_chunk = master_chunk.resample(period_alias)
+
+        yield pd.DataFrame({'master': master_chunk, 'slave': slave_chunk})
+
diff --git a/nilmtk/metergroup.py b/nilmtk/metergroup.py
@@ -8,9 +8,9 @@
 from .utils import (tree_root, nodes_adjacent_to_root, simplest_type_for,
                     flatten_2d_list)
 from .measurement import select_best_ac_type, AC_TYPES
-from .elecmeterandmetergroup import ElecMeterAndMeterGroup
+from .electric import Electric
 
-class MeterGroup(ElecMeterAndMeterGroup):
+class MeterGroup(Electric):
     """A group of ElecMeter objects. Can contain nested MeterGroup objects.
 
     Implements many of the same methods as ElecMeter.

diff --git a/nilmtk/metrics.py b/nilmtk/metrics.py
@@ -30,7 +30,7 @@
 import pandas as pd
 import math
 from .metergroup import MeterGroup, iterate_through_submeters_of_two_metergroups
-from .elecmeter import diff_between_two_meters
+from .electric import align_two_meters
 
 def error_in_assigned_energy(predictions, ground_truth):
     """Compute error in assigned energy.
@@ -124,10 +124,10 @@ def mean_normalized_error_power(predictions, ground_truth):
     for pred_meter, ground_truth_meter in both_sets_of_meters:
         total_abs_diff = 0.0
         sum_of_ground_truth_power = 0.0
-        diff_generator = diff_between_two_meters(pred_meter, ground_truth_meter)
-        for diff, sum_gnd_truth_power_for_chunk in diff_generator:
+        for aligned_meters in align_two_meters(pred_meter, ground_truth_meter):
+            diff = (aligned_meters.icol(0) - aligned_meters.icol(1)).dropna()
             total_abs_diff += sum(abs(diff))
-            sum_of_ground_truth_power += sum_gnd_truth_power_for_chunk
+            sum_of_ground_truth_power += aligned_meters.icol(1).sum()
 
         mne[pred_meter.instance()] = total_abs_diff / sum_of_ground_truth_power
 
@@ -158,39 +158,55 @@ def rms_error_power(predictions, ground_truth):
     for pred_meter, ground_truth_meter in both_sets_of_meters:
         sum_of_squared_diff = 0.0
         n_samples = 0
-        diff_generator = diff_between_two_meters(pred_meter, ground_truth_meter)
-        for diff, _ in diff_generator:
+        for aligned_meters in align_two_meters(pred_meter, ground_truth_meter):
+            diff = (aligned_meters.icol(0) - aligned_meters.icol(1)).dropna()
             sum_of_squared_diff += (diff ** 2).sum()
             n_samples += len(diff)
 
         error[pred_meter.instance()] = math.sqrt(sum_of_squared_diff / n_samples)
 
     return pd.Series(error)
 
-########## FUNCTIONS BELOW THIS LINE HAVE NOT YET CONVERTED TO NILMTK v0.2 #####
-
 
-def powers_to_states(powers):
-    '''Converts power demands into binary states
+def f_score(predictions, ground_truth):
+    '''Compute F1 scores.
+    
+    .. math::
+        F_{score}^{(n)} = \\frac
+            {2 * Precision * Recall}
+            {Precision + Recall}
 
     Parameters
     ----------
-
-    powers: Pandas DataFrame of type {appliance :
-         [array of power]}
+    predictions, ground_truth : nilmtk.MeterGroup
 
     Returns
     -------
-    states: Pandas DataFrame of type {appliance :
-         [array of states]}
+    f1_scores : pd.Series
+        Each index is an meter instance int (or tuple for MeterGroups).
+        Each value is the F1 score for that appliance.
+
     '''
+    from sklearn.metrics import f1_score
 
-    on_power_threshold = 50
+    threshold = 30
+    predicted_states = (predicted_power > threshold).astype(int)
+    ground_truth_states = (ground_truth_power > threshold).astype(int)
+    f1_scores = {}
 
-    states = pd.DataFrame(np.zeros(power.shape))
-    states[power > on_power_threshold] = 1
+    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
+        predictions, ground_truth)
+    # for pred_meter, ground_truth_meter in both_sets_of_meters:
+    #     f1_scores[pred_meter.instance()] = pass
+
+    for appliance in predicted_states.columns:
+        f1_scores[appliance] = f1_score(
+            ground_truth_states[[appliance]], predicted_states[[appliance]])
+    return pd.Series(f1_scores)
+
+
+########## FUNCTIONS BELOW THIS LINE HAVE NOT YET CONVERTED TO NILMTK v0.2 #####
 
-    return states
 
 """
 def confusion_matrices(predicted_states, ground_truth_states):
@@ -333,41 +349,6 @@ def precision_recall(predicted_states, ground_truth_states):
     return np.array([prec, rec])
 
 
-def f_score(predicted_power, ground_truth_power):
-    '''Compute F1 score
-    
-    .. math::
-        F_score^{(n)} = \\frac
-            {2 * Precision * Recall}
-            {Precision + Recall}
-
-    Parameters
-    ----------
-
-    predicted_state: Pandas DataFrame of type {appliance :
-         [array of predicted states]}
-
-    ground_truth_state: Pandas DataFrame of type {appliance :
-        [array of ground truth states]}
-
-    Returns
-    -------
-    numpy array where columns represent appliances and rows represent F score
-    '''
-    from sklearn.metrics import f1_score
-    threshold = 30
-    predicted_states = (predicted_power > threshold).astype(int)
-    ground_truth_states = (ground_truth_power > threshold).astype(int)
-    f_score_out = {}
-    for appliance in predicted_states.columns:
-        f_score_out[appliance] = f1_score(
-            ground_truth_states[[appliance]], predicted_states[[appliance]])
-    return f_score_out
-
-    #prec_rec = precision_recall(predicted_states, ground_truth_states)
-    # return (2 * prec_rec[0, :] * prec_rec[1,:]) / (prec_rec[0,:] +
-    # prec_rec[1,:])
-    # return f1_score(ground_truth_states, predicted_states)
 
 
 def hamming_loss(predicted_state, ground_truth_state):

diff --git a/notebooks/test_v0_2_metrics.ipynb b/notebooks/test_v0_2_metrics.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:2ae286c4bd340ef4d8cea0ef226554692cb042537aa7b481a607b8f8d16f721a"
+  "signature": "sha256:03e4b64c7582d6058a6ae9c2ffb2f7b02a058f5df866b274c2e8daf5d16d4eb9"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -252,20 +252,31 @@
      "metadata": {},
      "outputs": [
       {
-       "ename": "KeyError",
-       "evalue": "ElecMeterID(instance=(10, 20), building=1, dataset='REDD')",
-       "output_type": "pyerr",
-       "traceback": [
-        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-        "\u001b[1;32m<ipython-input-11-3c48e5ec6b4a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmne\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean_normalized_error_power\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredictions_metergroup\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mground_truth_metergroup\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mmne\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/jack/workspace/python/nilmtk/nilmtk/metrics.pyc\u001b[0m in \u001b[0;36mmean_normalized_error_power\u001b[1;34m(predictions, ground_truth)\u001b[0m\n\u001b[0;32m    121\u001b[0m     \u001b[0mmne\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    122\u001b[0m     both_sets_of_meters = iterate_through_submeters_of_two_metergroups(\n\u001b[1;32m--> 123\u001b[1;33m         predictions, ground_truth)\n\u001b[0m\u001b[0;32m    124\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mpred_meter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mground_truth_meter\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mboth_sets_of_meters\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    125\u001b[0m         \u001b[0mtotal_abs_diff\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/jack/workspace/python/nilmtk/nilmtk/metergroup.py\u001b[0m in \u001b[0;36miterate_through_submeters_of_two_metergroups\u001b[1;34m(master, slave)\u001b[0m\n\u001b[0;32m    674\u001b[0m         slave_identifier = master_meter.identifier._replace(\n\u001b[0;32m    675\u001b[0m             dataset=slave.dataset())\n\u001b[1;32m--> 676\u001b[1;33m         \u001b[0mslave_meter\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mslave\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mslave_identifier\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    677\u001b[0m         \u001b[0mzipped\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmaster_meter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mslave_meter\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    678\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mzipped\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/jack/workspace/python/nilmtk/nilmtk/metergroup.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m    140\u001b[0m                         group.dataset() == key.dataset):\n\u001b[0;32m    141\u001b[0m                         \u001b[1;32mreturn\u001b[0m \u001b[0mgroup\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    143\u001b[0m         \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# find MeterGroup from list of ElecMeterIDs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    144\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;31mKeyError\u001b[0m: ElecMeterID(instance=(10, 20), building=1, dataset='REDD')"
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "5            0.496294\n",
+        "6            0.889508\n",
+        "7            0.251467\n",
+        "8            0.314228\n",
+        "9            0.420332\n",
+        "11           0.862761\n",
+        "12           3.573414\n",
+        "13          31.760268\n",
+        "14          20.618227\n",
+        "15           2.130499\n",
+        "16           4.078660\n",
+        "17           0.893244\n",
+        "18           0.963919\n",
+        "19           0.387947\n",
+        "(3, 4)       1.384429\n",
+        "(10, 20)     0.258725\n",
+        "dtype: float64"
        ]
       }
      ],
-     "prompt_number": 11
+     "prompt_number": 10
     },
     {
      "cell_type": "code",
@@ -276,15 +287,42 @@
      ],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "5            90.771657\n",
+        "6           158.747636\n",
+        "7            11.944890\n",
+        "8            23.672594\n",
+        "9            43.961804\n",
+        "11          148.995636\n",
+        "12          131.823771\n",
+        "13            7.964562\n",
+        "14           86.237925\n",
+        "15          102.127704\n",
+        "16          131.342569\n",
+        "17           42.976850\n",
+        "18           31.552245\n",
+        "19            0.007743\n",
+        "(3, 4)      195.778450\n",
+        "(10, 20)    130.869652\n",
+        "dtype: float64"
+       ]
+      }
+     ],
+     "prompt_number": 11
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 11
     }
    ],
    "metadata": {}

diff --git a/notebooks/testing_nilmtk_V0.2.ipynb b/notebooks/testing_nilmtk_V0.2.ipynb