<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#testing-the-MNIST-example" data-toc-modified-id="testing-the-MNIST-example-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>testing the MNIST example</a></span><ul class="toc-item"><li><span><a href="#nice" data-toc-modified-id="nice-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>nice</a></span></li></ul></li></ul></div>

# testing the MNIST example
there's no /data folder for the original, it just loads from openML. 

In [1]:
# ------------------------------------------------------------------------------
# HTM Community Edition of NuPIC
# Copyright (C) 2018-2019, David McDougall
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero Public License version 3 as published by the Free
# Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License along with
# this program.  If not, see http://www.gnu.org/licenses.
# ------------------------------------------------------------------------------
""" An MNIST classifier using Spatial Pooler."""

import random
import numpy as np
import sys

# fetch datasets from www.openML.org/ 
from sklearn.datasets import fetch_openml

from htm.bindings.algorithms import SpatialPooler, Classifier
from htm.bindings.sdr import SDR, Metrics

def load_ds(name, num_test, shape=None):
    """ 
    fetch dataset from openML.org and split to train/test
    @param name - ID on openML (eg. 'mnist_784')
    @param num_test - num. samples to take as test
    @param shape - new reshape of a single data point (ie data['data'][0]) as a list. Eg. [28,28] for MNIST
    """
    data = fetch_openml(name, version=1)
    sz=data['target'].shape[0]

    X = data['data']
    if shape is not None:
        new_shape = shape.insert(0, sz)
        X = np.reshape(X, shape)

    y = data['target'].astype(np.int32)
    # split to train/test data
    train_labels = y[:sz-num_test]
    train_images = X[:sz-num_test]
    test_labels  = y[sz-num_test:]
    test_images  = X[sz-num_test:]

    return train_labels, train_images, test_labels, test_images

def encode(data, out):
    """
    encode the (image) data
    @param data - raw data
    @param out  - return SDR with encoded data
    """
    out.dense = data >= np.mean(data) # convert greyscale image to binary B/W.
    #TODO improve. have a look in htm.vision etc. For MNIST this is ok, for fashionMNIST in already loses too much information


# These parameters can be improved using parameter optimization,
# see py/htm/optimization/ae.py
# For more explanation of relations between the parameters, see 
# src/examples/mnist/MNIST_CPP.cpp 
default_parameters = {
    'potentialRadius': 7,
    'boostStrength': 7.0,
    'columnDimensions': (79, 79),
    'dutyCyclePeriod': 1402,
    'localAreaDensity': 0.1,
    'minPctOverlapDutyCycle': 0.2,
    'potentialPct': 0.1,
    'stimulusThreshold': 6,
    'synPermActiveInc': 0.14,
    'synPermConnected': 0.5,
    'synPermInactiveDec': 0.02
}

train_labels, train_images, test_labels, test_images = load_ds('mnist_784', #HTM: ~95% Fashion-MNIST
                                                              10000, shape=[28,28]) 
# why don't we try it on their fashion-MNIST dataset? same format etc
training_data = list(zip(train_images, train_labels))
test_data = list(zip(test_images, test_labels))
random.shuffle(training_data)

# build the HTM
enc = SDR(train_images[0].shape) # instantiate encoder obj as SDR(relevant_shape)
parameters = default_parameters

type(enc)

enc.dimensions

enc = SDR(train_images[0].shape)
sp = SpatialPooler(
    inputDimensions            = enc.dimensions,
    columnDimensions           = parameters['columnDimensions'],
    potentialRadius            = parameters['potentialRadius'],
    potentialPct               = parameters['potentialPct'],
    globalInhibition           = True,
    localAreaDensity           = parameters['localAreaDensity'],
    stimulusThreshold          = int(round(parameters['stimulusThreshold'])),
    synPermInactiveDec         = parameters['synPermInactiveDec'],
    synPermActiveInc           = parameters['synPermActiveInc'],
    synPermConnected           = parameters['synPermConnected'],
    minPctOverlapDutyCycle     = parameters['minPctOverlapDutyCycle'],
    dutyCyclePeriod            = int(round(parameters['dutyCyclePeriod'])),
    boostStrength              = parameters['boostStrength'],
    seed                       = 0, # this is important, 0="random" seed which changes on each invocation
    spVerbosity                = 99,
    wrapAround                 = False)
columns = SDR( sp.getColumnDimensions() )
columns_stats = Metrics( columns, 99999999 )
sdrc = Classifier()
# after some errors with enc.dimensions lowercase, it was still giving some mismatched parameter error
# i ran the copy-pasted version and it was fine, so i just retyped a typo somewhere

# training loop
for i in range(len(train_images)):
    img, lbl = training_data[i]
    encode(img,enc) # predefined method, pass in img and encoder SDR object
    sp.compute(enc,True,columns) # spatial pool!
    sdrc.learn(columns, lbl) # sdrClassifier, can it accept string(label) or only int(label)s?
print(str(sp))
print(str(columns_stats))

# testing loop
score = 0
for img, lbl in test_data:
    encode(img, enc) # encode the test image with encoder.
    # img is now in encoder's most-recent "stack" for following pool
    sp.compute(enc, False, columns) # spatial pool the encoder with columns
    if lbl == np.argmax(sdrc.infer(columns)): # if lbl = index of max of column-inference
        score += 1            
score = score / len(test_data)
print('__score__:  ', 100*score, '%')


------------CPP SpatialPooler Parameters ------------------
iterationNum                = 0
iterationLearnNum           = 0
numInputs                   = 784
numColumns                  = 6241
numActiveColumnsPerInhArea  = 0
potentialPct                = 0.1
globalInhibition            = 1
localAreaDensity            = 0.1
stimulusThreshold           = 6
synPermActiveInc            = 0.14
synPermInactiveDec          = 0.02
synPermConnected            = 0.5
minPctOverlapDutyCycles     = 0.2
dutyCyclePeriod             = 1402
boostStrength               = 7
spVerbosity                 = 99
wrapAround                  = 0
version                     = 2
CPP SP seed                 = 0
Spatial Pooler Connections:
    Inputs (784) ~> Outputs (6241) via Segments (6241)
    Segments on Cell Min/Mean/Max 1 / 1 / 1
    Potential Synapses on Segment Min/Mean/Max 6 / 17.1349 / 23
    Connected Synapses on Segment Min/Mean/Max 6 / 10.6491 / 23
    Synapses Dead (0.31864%) Saturated (0.428263%)
   

## nice
runs just fine even out of source notebook.

next step: tinker with the black-white encoding to get color, try on the fashion dataset.