Merge pull request #1346 from chetan51/issue-1329_metrics

Add sequence metrics to TemporalMemoryInspectMixin
numenta · Sep 21, 2014 · 6308226 · 6308226
2 parents 58b98c8 + d35a7c6
commit 6308226
Show file tree

Hide file tree

Showing 3 changed files with 144 additions and 19 deletions.
diff --git a/nupic/research/temporal_memory_inspect_mixin.py b/nupic/research/temporal_memory_inspect_mixin.py
@@ -23,7 +23,7 @@
 Temporal Memory mixin that enables detailed inspection of history.
 """
 
-from collections import namedtuple
+from collections import defaultdict, namedtuple
 
 import numpy
 from prettytable import PrettyTable
@@ -46,6 +46,7 @@ def __init__(self, *args, **kwargs):
     self.predictedActiveColumnsList = None
     self.predictedInactiveColumnsList = None
     self.unpredictedActiveColumnsList = None
+    self.predictedActiveCellsForSequenceDict = None
     self.clearHistory()
 
 
@@ -57,6 +58,7 @@ def clearHistory(self):
     self.predictedActiveColumnsList = []
     self.predictedInactiveColumnsList = []
     self.unpredictedActiveColumnsList = []
+    self.predictedActiveCellsForSequenceDict = defaultdict(set)
 
 
   def prettyPrintHistory(self, verbosity=0):
@@ -70,10 +72,10 @@ def prettyPrintHistory(self, verbosity=0):
     cols = ["#",
             "Pattern",
             "Sequence Label",
-            "pred=>active columns",
+            "pred=>active columns (correctly predicted)",
             "pred=>inactive columns",
-            "unpred=>active columns",
-            "pred=>active cells",
+            "unpred=>active columns (bursting)",
+            "pred=>active cells (correctly predicted)",
             "pred=>inactive cells"]
 
     if verbosity == 0:
@@ -120,8 +122,6 @@ def prettyPrintConnections(self):
     """
     Pretty print the connections in the temporal memory.
 
-    @param verbosity (int) Verbosity level
-
     @return (string) Pretty-printed text
     """
     text = ""
@@ -161,6 +161,25 @@ def prettyPrintConnections(self):
     return text
 
 
+  def prettyPrintSequenceCellRepresentations(self, sortby="Column"):
+    """
+    Pretty print the cell representations for sequences in the history.
+
+    @param sortby (string) Column of table to sort by
+
+    @return (string) Pretty-printed text
+    """
+    table = PrettyTable(["Pattern", "Column", "predicted=>active cells"])
+
+    for sequenceLabel, predictedActiveCells in (
+          self.predictedActiveCellsForSequenceDict.iteritems()):
+      cellsForColumn = self.mapCellsToColumns(predictedActiveCells)
+      for column, cells in cellsForColumn.iteritems():
+        table.add_row([sequenceLabel, column, list(cells)])
+
+    return table.get_string(sortby=sortby)
+
+
   def getStatistics(self):
     """
     Returns statistics for the history, as a named tuple with the following
@@ -171,6 +190,11 @@ def getStatistics(self):
         - `predictedActiveColumns`
         - `predictedInactiveColumns`
         - `unpredictedActiveColumns`
+        - `sequencesPredictedActiveCellsPerColumn`
+        - `sequencesPredictedActiveCellsShared`
+
+    Note: `sequencesPredictedActiveCellsShared` metric is flawed when it
+    comes to high-order sequences.
 
     Each element in the tuple is a named tuple with the following fields:
 
@@ -190,34 +214,76 @@ def getStatistics(self):
       'predictedInactiveCells',
       'predictedActiveColumns',
       'predictedInactiveColumns',
-      'unpredictedActiveColumns'
+      'unpredictedActiveColumns',
+      'sequencesPredictedActiveCellsPerColumn',
+      'sequencesPredictedActiveCellsShared'
     ])
+
     Data = namedtuple('Data', [
       'min',
       'max',
       'sum',
       'average',
       'standardDeviation'
     ])
-    def statsForResult(result):
-      counts = [len(x) for idx, x in enumerate(result)
-                if (idx > 0 and
-                    self.patterns[idx] is not None and
-                    self.patterns[idx-1] is not None)]
+
+    def statsForCounts(counts):
+      if len(counts) == 0:
+        return Data._make([None] * 5)
       return Data(min(counts),
                   max(counts),
                   sum(counts),
                   numpy.mean(counts),
                   numpy.std(counts))
 
+    def statsForHistoryItem(historyItem):
+      counts = [len(x) for idx, x in enumerate(historyItem)
+                if (idx > 0 and
+                    self.patterns[idx] is not None and
+                    self.patterns[idx-1] is not None)]
+      return statsForCounts(counts)
+
     history = (
       self.predictedActiveCellsList,
       self.predictedInactiveCellsList,
       self.predictedActiveColumnsList,
       self.predictedInactiveColumnsList,
       self.unpredictedActiveColumnsList
     )
-    return Stats._make([statsForResult(result) for result in history])
+    stats = [statsForHistoryItem(item) for item in history]
+
+    numCellsPerColumn = []
+    numSequencesForCell = defaultdict(lambda: 0)
+
+    for predictedActiveCells in (
+        self.predictedActiveCellsForSequenceDict.values()):
+      cellsForColumn = self.mapCellsToColumns(predictedActiveCells)
+      numCellsPerColumn += [len(x) for x in cellsForColumn.values()]
+
+      for cell in predictedActiveCells:
+        numSequencesForCell[cell] += 1
+
+    stats.append(statsForCounts(numCellsPerColumn))
+    stats.append(statsForCounts(numSequencesForCell.values()))
+
+    return Stats._make(stats)
+
+
+  def mapCellsToColumns(self, cells):
+    """
+    Maps cells to the columns they belong to
+
+    @param cells (set) Cells
+
+    @return (dict) Mapping from columns to their cells in `cells`
+    """
+    cellsForColumns = defaultdict(set)
+
+    for cell in cells:
+      column = self.connections.columnForCell(cell)
+      cellsForColumns[column].add(cell)
+
+    return cellsForColumns
 
 
   # ==============================
@@ -257,6 +323,10 @@ def _record(self, activeColumns, sequenceLabel):
       if prevPredictedColumn in activeColumns:
         predictedActiveCells.add(prevPredictedCell)
         predictedActiveColumns.add(prevPredictedColumn)
+
+        if sequenceLabel is not None:
+          self.predictedActiveCellsForSequenceDict[sequenceLabel].add(
+            prevPredictedCell)
       else:
         predictedInactiveCells.add(prevPredictedCell)
         predictedInactiveColumns.add(prevPredictedColumn)

diff --git a/tests/integration/py2/nupic/algorithms/extensive_temporal_memory_test.py b/tests/integration/py2/nupic/algorithms/extensive_temporal_memory_test.py
@@ -520,11 +520,13 @@ def setUpClass(cls):
   @classmethod
   def tearDownClass(cls):
     cols = ["Test",
-            "predicted active cells (stats)",
-            "predicted inactive cells (stats)",
-            "predicted active columns (stats)",
-            "predicted inactive columns (stats)",
-            "unpredicted active columns (stats)"]
+            "# of predicted active cells (stats)",
+            "# of predicted inactive cells (stats)",
+            "# of predicted active columns (stats)",
+            "# of predicted inactive columns (stats)",
+            "# of unpredicted active columns (stats)",
+            "# of predicted active cells per column in each sequence (stats)",
+            "# of sequences each predicted active cell shows up in"]
 
     table = PrettyTable(cols)
 

diff --git a/tests/integration/py2/nupic/algorithms/inspect_temporal_memory_test.py b/tests/integration/py2/nupic/algorithms/inspect_temporal_memory_test.py
@@ -69,6 +69,29 @@ def testFeedSequence(self):
     self.assertEqual(len(self.tm.predictedInactiveColumnsList[-2]), 5)
     self.assertEqual(len(self.tm.unpredictedActiveColumnsList[-2]), 5)
 
+    self.assertTrue("Test" in self.tm.predictedActiveCellsForSequenceDict)
+    predictedActiveCells = reduce(lambda x, y: x | y,
+                                  self.tm.predictedActiveCellsList)
+    self.assertEqual(self.tm.predictedActiveCellsForSequenceDict["Test"],
+                     predictedActiveCells)
+
+    sequence.reverse()
+    sequence.append(sequence.pop(0))  # Move None (reset) to the end
+    self._feedSequence(sequence, sequenceLabel="Test2")
+
+    self.assertTrue("Test" in self.tm.predictedActiveCellsForSequenceDict)
+    self.assertEqual(self.tm.predictedActiveCellsForSequenceDict["Test"],
+                     predictedActiveCells)
+    self.assertTrue("Test2" in self.tm.predictedActiveCellsForSequenceDict)
+    self.assertNotEqual(self.tm.predictedActiveCellsForSequenceDict["Test"],
+                        self.tm.predictedActiveCellsForSequenceDict["Test2"])
+
+
+  def testFeedSequenceNoSequenceLabel(self):
+    sequence = self._generateSequence()
+    self._feedSequence(sequence)
+    self.assertEqual(len(self.tm.predictedActiveCellsForSequenceDict), 0)
+
 
   def testComputeStatistics(self):
     sequence = self._generateSequence()
@@ -78,10 +101,40 @@ def testComputeStatistics(self):
     self._feedSequence(sequence)  # test
     stats = self.tm.getStatistics()
 
-    self.assertEqual(len(stats), 5)
+    self.assertEqual(len(stats), 7)
     self.assertEqual(stats.predictedInactiveCells.sum, 0)
     self.assertEqual(stats.predictedInactiveColumns.sum, 0)
     self.assertEqual(stats.unpredictedActiveColumns.sum, 0)
+    self.assertEqual(stats.sequencesPredictedActiveCellsPerColumn.sum, None)
+    self.assertEqual(stats.sequencesPredictedActiveCellsShared.sum, None)
+
+
+  def testComputeStatisticsSequenceStatistics(self):
+    sequence = self._generateSequence()
+    self._feedSequence(sequence, "Test1")
+
+    sequence.reverse()
+    sequence.append(sequence.pop(0))  # Move None (reset) to the end
+    self._feedSequence(sequence, "Test2")
+
+    stats = self.tm.getStatistics()
+
+    self.assertEqual(stats.sequencesPredictedActiveCellsPerColumn.average, 1)
+    self.assertEqual(stats.sequencesPredictedActiveCellsShared.average, 1)
+
+    self._feedSequence(sequence, "Test3")
+
+    stats = self.tm.getStatistics()
+
+    self.assertEqual(stats.sequencesPredictedActiveCellsPerColumn.average, 1)
+    self.assertTrue(stats.sequencesPredictedActiveCellsShared.average > 1)
+
+
+  def testMapCellsToColumns(self):
+    columnsForCells = self.tm.mapCellsToColumns(set([0, 1, 2, 5, 399]))
+    self.assertEqual(columnsForCells[0], set([0, 1, 2]))
+    self.assertEqual(columnsForCells[1], set([5]))
+    self.assertEqual(columnsForCells[99], set([399]))
 
 
   # ==============================