# EXP 4-NoisySinusoid

In [1]:
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd

from nupic.encoders import ScalarEncoder
from nupic.bindings.algorithms import TemporalMemory as TM
from nupic.bindings.algorithms import SpatialPooler as SP
from htmresearch.support.neural_correlations_utils import *

random.seed(1)

In [2]:
inputSize = 109
maxItems = 15000
totalTS = maxItems

In [3]:
tm = TM(columnDimensions = (2048,),
        cellsPerColumn=8, # originally this value is 32
        initialPermanence=0.21,
        connectedPermanence=0.3,
        minThreshold=15,
        maxNewSynapseCount=40,
        permanenceIncrement=0.1,
        permanenceDecrement=0.1,
        activationThreshold=15,
        predictedSegmentDecrement=0.01
       )

sparsity = 0.02
sparseCols = int(tm.numberOfColumns() * sparsity)

sp = SP(inputDimensions=(inputSize,),
        columnDimensions=(2048,),
        potentialRadius = int(0.5*inputSize),
        numActiveColumnsPerInhArea = sparseCols,
        globalInhibition = True,
        synPermActiveInc = 0.0001,
        synPermInactiveDec = 0.0005,
        synPermConnected = 0.5,
        maxBoost = 1.0,
        spVerbosity = 1
       )

## Part I. Encoder

In [4]:
# Generate random data
noiseLevel = 0.25
x = np.linspace(-100, 100, maxItems)
y = np.sin(x)
noise = np.random.normal(0, noiseLevel, maxItems)

noisY = np.sin(x) + noise
plt.plot(x, noisY)
plt.xlabel("x")
plt.ylabel("f(x)")
plt.savefig("rawData")
plt.close() 

In [5]:
numTrainingItems = 7500
trainSet = []
nonTrainSet = []

se = ScalarEncoder(n=109, w=29, minval=min(noisY), maxval=max(noisY), clipInput=True)

for i in range(maxItems):
    if i > 0 and i % 1000 == 0:
        print str(i) + " items processed"
    if i < numTrainingItems:
        trainSet.append(se.encode(noisY[i]))
    else:
        nonTrainSet.append(se.encode(noisY[i]))
print "*** All items encoded! ***"

1000 items processed
2000 items processed
3000 items processed
4000 items processed
5000 items processed
6000 items processed
7000 items processed
8000 items processed
9000 items processed
10000 items processed
11000 items processed
12000 items processed
13000 items processed
14000 items processed
*** All items encoded! ***


## Part II. Spatial Pooler

In [6]:
allSequences = []
outputColumns = np.zeros(tm.numberOfColumns(), dtype="uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")

# Set epochs for spatial-pooling:
spEpochs = 5

for epoch in range(spEpochs):
    print "Training epoch: " + str(epoch)
    
    #randomize records in training set
    randomIndex = np.random.permutation(np.arange(numTrainingItems))
    
    for i in range(numTrainingItems):
        sp.compute(trainSet[randomIndex[i]], True, outputColumns)
        # Populate array for Yuwei plot:
        for col in outputColumns.nonzero():
            columnUsage[col] += 1                        
        if epoch == (spEpochs - 1):
            allSequences.append(outputColumns.nonzero()) 

for i in range(maxItems - numTrainingItems):
    if i > 0 and i % 500 == 0:
        print str(i) + " items processed"    
    sp.compute(nonTrainSet[i], False, outputColumns)
    allSequences.append(outputColumns.nonzero())
    # Populate array for Yuwei plot:
    for col in outputColumns.nonzero():
        columnUsage[col] += 1                

print "*** All items processed! ***"

Training epoch: 0
Training epoch: 1
Training epoch: 2
Training epoch: 3
Training epoch: 4
500 items processed
1000 items processed
1500 items processed
2000 items processed
2500 items processed
3000 items processed
3500 items processed
4000 items processed
4500 items processed
5000 items processed
5500 items processed
6000 items processed
6500 items processed
7000 items processed
*** All items processed! ***


In [7]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_SP")
plt.close()

## Part III. Temporal Memory 

In [8]:
spikeTrains = np.zeros((tm.numberOfCells(), totalTS), dtype = "uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
ts = 0

entropyX = []
entropyY = []

negPCCX_cells = []
negPCCY_cells = []

negPCCX_cols = []
negPCCY_cols = []

# Randomly generate the indices of the columns to keep track during simulation time
colIndices = np.random.permutation(tm.numberOfColumns())[0:4] # keep track of 4 columns


for s in range(maxItems):
    if s % 500 == 0:
        print str(s) + " items processed"
        
    tm.compute(allSequences[s][0].tolist(), learn=True)
    for cell in tm.getActiveCells():
        spikeTrains[cell, ts] = 1            
    # Obtain active columns:
    activeColumnsIndices = [tm.columnForCell(i) for i in tm.getActiveCells()]
    currentColumns = [1 if i in activeColumnsIndices else 0 for i in range(tm.numberOfColumns())]
    for col in np.nonzero(currentColumns)[0]:
        columnUsage[col] += 1                
    
    if s > 0 and s % 2500 == 0:
        print "++ Analyzing correlations (cells at random) ++"                
        subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), ts)
        (corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
        negPCCX_cells.append(s)
        negPCCY_cells.append(numNegPCC)                
        print "++ Generating histogram ++"
        bins = 300
        plt.hist(corrMatrix.ravel(), bins, alpha=0.5)                
        # Set range for plot appropriately!
        plt.xlim(-0.05,0.1)
        plt.xlabel("PCC")
        plt.ylabel("Frequency")
        plt.savefig("cellsHist" + str(s))
        plt.close()
        # Compute entropy
        print "++ Computing entropy ++"
        entropyX.append(s)
        entropyY.append(computeEntropy(subSpikeTrains))  
        
        print "++ Analyzing correlations (whole columns) ++"                
        subSpikeTrains = subSampleWholeColumn(spikeTrains, colIndices, tm.getCellsPerColumn(), ts)
        (corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
        negPCCX_cols.append(s)
        negPCCY_cols.append(numNegPCC)
        print "++ Generating histogram ++"
        bins = 100
        plt.hist(corrMatrix.ravel(), bins, alpha=0.5)
        plt.xlabel("PCC")
        plt.ylabel("Frequency")
        plt.savefig("colsHist_" + str(s))
        plt.close() 
        print "++ Generating heatmap ++"
        plt.imshow(corrMatrix, cmap='spectral', interpolation='nearest')
        cb = plt.colorbar()
        cb.set_label('PCC')
        plt.savefig("colsHeatMap_" + str(s))
        plt.close() 

    ts += 1
                
print "***All items processed!***"

0 items processed
500 items processed
1000 items processed
1500 items processed
2000 items processed
2500 items processed
++ Analyzing correlations (cells at random) ++
++ Generating histogram ++
++ Computing entropy ++
++ Analyzing correlations (whole columns) ++
++ Generating histogram ++
++ Generating heatmap ++
3000 items processed
3500 items processed
4000 items processed
4500 items processed
5000 items processed
++ Analyzing correlations (cells at random) ++
++ Generating histogram ++
++ Computing entropy ++
++ Analyzing correlations (whole columns) ++
++ Generating histogram ++
++ Generating heatmap ++
5500 items processed
6000 items processed
6500 items processed
7000 items processed
7500 items processed
++ Analyzing correlations (cells at random) ++
++ Generating histogram ++
++ Computing entropy ++
++ Analyzing correlations (whole columns) ++
++ Generating histogram ++
++ Generating heatmap ++
8000 items processed
8500 items processed
9000 items processed
9500 items processed

In [9]:
# plot trace of negative PCCs
plt.plot(negPCCX_cells, negPCCY_cells)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cells")
plt.close()

plt.plot(negPCCX_cols, negPCCY_cols)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cols")
plt.close()

In [10]:
# print computeEntropy()
plt.plot(entropyX, entropyY)
plt.xlabel("Time")
plt.ylabel("Entropy")
plt.savefig("entropyTM")
plt.close()

In [11]:
plt.hist(columnUsage)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_TM")
plt.close()

## Part IV. Analysis of Spike Trains

In [12]:
simpleAccuracyTest("periodic", tm, allSequences)

Starting from record no.: 2014
Accuracy: 0
Active cols: [ 258  327  349  375  390  448  503  520  537  544  608  712  751  755  835
  847  900  954  970 1081 1088 1134 1154 1159 1164 1281 1290 1330 1356 1533
 1552 1578 1625 1628 1681 1688 1765 1835 1952 1966]
Predicted cols: [ 117  120  126  209  281  315  327  333  337  349  372  375  382  448  503
  516  520  525  542  544  608  620  634  683  700  712  715  726  751  753
  755  756  766  776  814  826  847  878  882  900  902  933  954  970  975
  977  994  998 1002 1086 1088 1089 1096 1101 1124 1154 1159 1164 1176 1234
 1281 1286 1290 1318 1319 1330 1337 1345 1369 1378 1398 1438 1449 1552 1578
 1593 1628 1640 1676 1688 1695 1696 1714 1765 1772 1793 1800 1802 1807 1835
 1852 1863 1895 1942 1966 1979 1999 2013]

Accuracy: 0.0714285714286
Active cols: [  11  209  245  288  299  330  333  363  367  378  530  538  608  646  678
  687  834  850  866 1040 1084 1096 1157 1233 1243 1251 1258 1345 1374 1385
 1412 1438 1548 1615 1663 1695 196

In [13]:
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), totalTS)

In [14]:
isi = computeISI(subSpikeTrains)

250 cells processed
500 cells processed
750 cells processed
**All cells processed**


In [15]:
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 100
plt.hist(isi, bins)
# plt.xlim(0,4000)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiTM")
plt.close()

## Part V. Save TM

In [16]:
saveTM(tm)

In [None]:
# to load the TM back from the file do:
with open('tm.nta', 'rb') as f:
    proto2 = TemporalMemoryProto_capnp.TemporalMemoryProto.read(f, traversal_limit_in_words=2**61)
tm = TM.read(proto2)

## Part VI. Analysis of Input

In [17]:
overlapMatrix = inputAnalysis(allSequences, "periodic", tm.numberOfColumns())

0 rows processed
500 rows processed
1000 rows processed
1500 rows processed
2000 rows processed
2500 rows processed
3000 rows processed
3500 rows processed
4000 rows processed
4500 rows processed
5000 rows processed
5500 rows processed
6000 rows processed
6500 rows processed
7000 rows processed
7500 rows processed
8000 rows processed
8500 rows processed
9000 rows processed
9500 rows processed
10000 rows processed
10500 rows processed
11000 rows processed
11500 rows processed
12000 rows processed
12500 rows processed
13000 rows processed
13500 rows processed
14000 rows processed
14500 rows processed
***All rows processed!***


In [18]:
# show heatmap of overlap matrix
plt.imshow(overlapMatrix, cmap='spectral', interpolation='nearest')
cb = plt.colorbar()
cb.set_label('Overlap Score')
plt.savefig("overlapScore_heatmap")
plt.close()
# plt.show()

# generate histogram
bins = 60
(n, bins, patches) = plt.hist(overlapMatrix.ravel(), bins, alpha=0.5)

plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist")

plt.xlim(0.5,1)
plt.ylim(0,1000000)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist_ZOOM")
plt.close()