Skip to content
This repository has been archived by the owner on Sep 1, 2023. It is now read-only.

Commit

Permalink
DEVOPS-353: Implement sparse links
Browse files Browse the repository at this point in the history
  • Loading branch information
lscheinkman committed Mar 6, 2018
1 parent de88baa commit 9ae6155
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 3 deletions.
28 changes: 28 additions & 0 deletions bindings/py/src/nupic/bindings/regions/PyRegion.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,3 +406,31 @@ def executeMethod(self, methodName, args):
raise Exception('Command: ' + methodName + ' must be callable')

return m(*args)

@staticmethod
def setSparseOutput(outputs, name, value):
"""
Set region sparse output value.
The region output memory is owned by the c++ caller and cannot be changed
directly from python. Use this method to update the sparse output fields in
the "outputs" array so it can be resized from the c++ code.
:param outputs: (dict) of numpy arrays (one per output)
:param name: (string) name of output
:param value: (object) the sparse array to assign to the output
"""
# The region output memory is owned by the c++ and cannot be changed from
# python. We use a special attribule named "__{name}_len__" to pass
# the sparse array length back to c++
lenAttr = "__{}_len__".format(name)
if lenAttr not in outputs:
raise Exception("Output {} is not a valid sparse output".format(name))

if outputs[name].size < value.size:
raise Exception(
"Output {} must be less than {}. Given value size is {}".format(
name, value.size))

outputs[lenAttr][0] = value.size
outputs[name][:value.size] = value
229 changes: 229 additions & 0 deletions bindings/py/tests/sparse_link_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2018, Numenta, Inc. Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program. If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------
import unittest

import numpy as np
import numpy.testing
from nupic.bindings.regions.PyRegion import PyRegion
from nupic.engine import Network

TEST_DATA_SPARSE = np.array([4, 7])
MAX_ACTIVE = TEST_DATA_SPARSE.size
OUTPUT_WIDTH = 10
TEST_DATA_DENSE = np.zeros(OUTPUT_WIDTH, dtype=np.bool)
TEST_DATA_DENSE[TEST_DATA_SPARSE] = True


class SparseRegion(PyRegion):
"""
This region receives sparse input and returns the same sparse output
:param maxActive: Max active bits in the sparse data
:param outputWidth: Size of output vector
"""

def __init__(self, maxActive, outputWidth, **kwargs):
PyRegion.__init__(self, **kwargs)

self.maxActive = maxActive
self.outputWidth = outputWidth

@classmethod
def getSpec(cls):
return {
"description": "Sparse Region",
"singleNodeOnly": True,
"inputs": {
"dataIn": {
"description": "Sparse Data In",
"dataType": "UInt32",
"isDefaultInput": True,
"required": False,
"sparse": True,
"count": 0
},
},
"outputs": {
"dataOut": {
"description": "Sparse Data Out",
"dataType": "UInt32",
"isDefaultOutput": True,
"sparse": True,
"count": 0
},
},
"parameters": {
"maxActive": {
"description": "Max active bits in the sparse data",
"dataType": "UInt32",
"accessMode": "ReadWrite",
"count": 1,
"constraints": "",
},
"outputWidth": {
"description": "Size of output vector",
"dataType": "UInt32",
"accessMode": "ReadWrite",
"count": 1,
"constraints": "",
}
}
}

def compute(self, inputs, outputs):
if "dataIn" in inputs:
PyRegion.setSparseOutput(outputs, "dataOut", inputs["dataIn"])
else:
PyRegion.setSparseOutput(outputs, "dataOut", self.data)

def initialize(self):
self.data = TEST_DATA_SPARSE

def getOutputElementCount(self, name):
return self.outputWidth


class DenseRegion(PyRegion):
"""
This region receives dense input and returns the same dense output
:param maxActive: Max active bits in the sparse data
:param outputWidth: Size of output vector
"""

def __init__(self, maxActive, outputWidth, **kwargs):
PyRegion.__init__(self, **kwargs)

self.maxActive = maxActive
self.outputWidth = outputWidth

@classmethod
def getSpec(cls):
return {
"description": "Dense Region",
"singleNodeOnly": True,
"inputs": {
"dataIn": {
"description": "Dense Data In",
"dataType": "Bool",
"isDefaultInput": True,
"required": False,
"count": 0
},
},
"outputs": {
"dataOut": {
"description": "Dense Data Out",
"dataType": "Bool",
"isDefaultOutput": True,
"count": 0
},
},
"parameters": {
"maxActive": {
"description": "Max active bits in the sparse data",
"dataType": "UInt32",
"accessMode": "ReadWrite",
"count": 1,
"constraints": "",
},
"outputWidth": {
"description": "Size of output vector",
"dataType": "UInt32",
"accessMode": "ReadWrite",
"count": 1,
"constraints": "",
}
}
}

def compute(self, inputs, outputs):
if "dataIn" in inputs:
outputs["dataOut"][:] = inputs["dataIn"]
else:
outputs["dataOut"][:] = self.data

def initialize(self):
self.data = TEST_DATA_DENSE

def getOutputElementCount(self, name):
return self.outputWidth


def createNetwork(fromRegion, toRegion):
"""Create test network"""
network = Network()
config = str({"maxActive": MAX_ACTIVE, "outputWidth": OUTPUT_WIDTH})
network.addRegion("from", fromRegion, config)
network.addRegion("to", toRegion, config)

network.link("from", "to", "UniformLink", "")
return network


class SparseLinkTest(unittest.TestCase):
"""Test sparse link"""
__name__ = "SparseLinkTest"

def setUp(self):
"""Register test regions"""
Network.registerPyRegion(SparseRegion.__module__, SparseRegion.__name__)
Network.registerPyRegion(DenseRegion.__module__, DenseRegion.__name__)

def testSparseToSparse(self):
"""Test links between sparse to sparse"""
net = createNetwork("py.SparseRegion", "py.SparseRegion")
net.initialize()
net.run(1)

actual = net.regions["to"].getOutputData("dataOut")
np.testing.assert_array_equal(actual, TEST_DATA_SPARSE)

def testSparseToDense(self):
"""Test links between sparse to dense"""
net = createNetwork("py.SparseRegion", "py.DenseRegion")
net.initialize()
net.run(1)

actual = net.regions["to"].getOutputData("dataOut")
np.testing.assert_array_equal(actual, TEST_DATA_DENSE)

def testDenseToSparse(self):
"""Test links between dense to sparse"""
net = createNetwork("py.DenseRegion", "py.SparseRegion")
net.initialize()
net.run(1)

actual = net.regions["to"].getOutputData("dataOut")
np.testing.assert_array_equal(actual, TEST_DATA_SPARSE)

def testDenseToDense(self):
"""Test links between dense to dense"""
net = createNetwork("py.DenseRegion", "py.DenseRegion")
net.initialize()
net.run(1)

actual = net.regions["to"].getOutputData("dataOut")
np.testing.assert_array_equal(actual, TEST_DATA_DENSE)


if __name__ == '__main__':
unittest.main()
58 changes: 55 additions & 3 deletions src/nupic/engine/Link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@

namespace nupic {

// Represents 'zero' scalar value used to compare Input/Output buffer contents
// for non-zero values
const static NTA_Real64 ZERO_VALUE = 0;

Link::Link(const std::string &linkType, const std::string &linkParams,
const std::string &srcRegionName, const std::string &destRegionName,
const std::string &srcOutputName, const std::string &destInputName,
Expand Down Expand Up @@ -329,8 +333,8 @@ void Link::compute() {

const Array &dest = dest_->getData();

size_t srcSize = src.getBufferSize();
size_t typeSize = BasicType::getSize(src.getType());
size_t srcSize = src.getCount() * typeSize;
size_t destByteOffset = destOffset_ * typeSize;

if (_LINK_DEBUG) {
Expand All @@ -339,8 +343,56 @@ void Link::compute() {
<< " elements=" << src;
}

::memcpy((char *)(dest.getBuffer()) + destByteOffset, src.getBuffer(),
srcSize);
if (src_->isSparse() == dest_->isSparse()) {
// No conversion required, just copy the buffer over
::memcpy((char *)(dest.getBuffer()) + destByteOffset, src.getBuffer(),
srcSize);
if (dest_->isSparse()) {
// Remove 'const' to update the variable lenght array
const_cast<Array &>(dest).setCount(src.getCount());
}
} else if (dest_->isSparse()) {
// Destination is sparse, convert source from dense to sparse

// Sparse Output must be NTA_UInt32. See "initialize".
NTA_UInt32 *destBuf =
(NTA_UInt32 *)((char *)(dest.getBuffer()) + destByteOffset);

// Dense source can be any scalar type. The scalar values will be lost
// and only the indexes of the non-zero values will be stored.
char *srcBuf = (char *)src.getBuffer();
size_t destLen = dest.getBufferSize();
size_t destIdx = 0;
for (size_t i = 0; i < srcSize; i++) {
// Check for any non-zero scalar value
if (::memcmp(srcBuf + i * typeSize, &ZERO_VALUE, typeSize)) {
NTA_CHECK(destIdx < destLen) << "Link destination is too small. "
<< "It should be at least " << destIdx + 1;
destBuf[destIdx++] = i;
}
}
// Remove 'const' to update the variable lenght array
const_cast<Array &>(dest).setCount(destIdx);
} else {
// Destination is dense, convert source from sparse to dense

// Sparse Input must be NTA_UInt32. See "initialize".
NTA_UInt32 *srcBuf = (NTA_UInt32 *)src.getBuffer();

// Dense destination links must be bool. See "initialize".
bool *destBuf = (bool *)((char *)dest.getBuffer() + destByteOffset);

size_t srcLen = src.getCount();
size_t destLen = dest.getBufferSize();
::memset(destBuf, 0, destLen * sizeof(bool));
size_t destIdx;
for (size_t i = 0; i < srcLen; i++) {
destIdx = srcBuf[i];
NTA_CHECK(destIdx < destLen) << "Link destination is too small. "
<< "It should be at least " << destIdx + 1;
destBuf[destIdx] = true;
}
}
}

void Link::shiftBufferedData() {
Expand Down
34 changes: 34 additions & 0 deletions src/nupic/regions/PyRegion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,20 @@ void PyRegion::compute() {

// Insert the buffer to the outputs py::Dict
outputs.setItem(p.first, numpyArray);

// Add sparse output len placeholder field
if (out->isSparse()) {
// The region output memory is owned by the c++ and cannot be changed from
// python. We use a special attribule named "__{name}_len__" to pass
// the sparse array length back to c++
std::stringstream name;
name << "__" << p.first << "_len__";

// The outputs dict is immutable. Use a list to enable update from python
py::List len;
len.append(py::Int(data.getCount()));
outputs.setItem(name.str(), len);
}
}

// Call the Python compute() method
Expand All @@ -945,6 +959,26 @@ void PyRegion::compute() {

// Need to put the None result in py::Ptr to decrement the ref count
py::Ptr none(node_.invoke("guardedCompute", args));

// Resize sparse outputs
for (size_t i = 0; i < ns.outputs.getCount(); ++i) {
const std::pair<std::string, OutputSpec> &p = ns.outputs.getByIndex(i);
// Get the corresponding output buffer
Output *out = region_->getOutput(p.first);
// Skip optional outputs
if (!out)
continue;

if (out->isSparse()) {
std::stringstream name;
name << "__" << p.first << "_len__";
py::List len(outputs.getItem(name.str()));

// Remove 'const' to update the variable lenght array
Array &data = const_cast<Array &>(out->getData());
data.setCount(py::Int(len.getItem(0)));
}
}
}

//
Expand Down

0 comments on commit 9ae6155

Please sign in to comment.