# EXP 3-C

In [1]:
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd

from nupic.encoders import ScalarEncoder
from nupic.bindings.algorithms import TemporalMemory as TM
from nupic.bindings.algorithms import SpatialPooler as SP
from htmresearch.support.neural_correlations_utils import *

uintType = "uint32"
random.seed(1)

In [2]:
inputSize = 109
maxItems = 17520
totalTS = maxItems

In [3]:
# read csv file
df = pd.read_csv('nyc_taxi.csv', skiprows=[1, 2])

In [4]:
tm = TM(columnDimensions = (2048,),
        cellsPerColumn=8, # We changed here the number of cells per col, initially they were 32
        initialPermanence=0.21,
        connectedPermanence=0.3,
        minThreshold=15,
        maxNewSynapseCount=40,
        permanenceIncrement=0.1,
        permanenceDecrement=0.1,
        activationThreshold=15,
        predictedSegmentDecrement=0.01
       )

sparsity = 0.02
sparseCols = int(tm.numberOfColumns() * sparsity)

sp = SP(inputDimensions=(inputSize,),
        columnDimensions=(2048,),
        potentialRadius = int(0.5*inputSize),
        numActiveColumnsPerInhArea = sparseCols,
        globalInhibition = True,
        synPermActiveInc = 0.0001,
        synPermInactiveDec = 0.0005,
        synPermConnected = 0.5,
        maxBoost = 1.0,
        spVerbosity = 1
       )

## Part I. Encoder

In [5]:
rawValues = []
remainingRows = maxItems
numTrainingItems = 15000
trainSet = []
nonTrainSet = []

se = ScalarEncoder(n=109, w=29, minval=0, maxval=40000, clipInput=True)
s = 0

for index, row in df.iterrows():
    if s > 0 and s % 500 == 0:
        print str(s) + " items processed"
        
    rawValues.append(row['passenger_count'])
    
    if s < numTrainingItems:
        trainSet.append(se.encode(row['passenger_count']))
    else:
        nonTrainSet.append(se.encode(row['passenger_count']))
        
    remainingRows -= 1
    s += 1
    if remainingRows == 0: 
        break
print "*** All items encoded! ***"

500 items processed
1000 items processed
1500 items processed
2000 items processed
2500 items processed
3000 items processed
3500 items processed
4000 items processed
4500 items processed
5000 items processed
5500 items processed
6000 items processed
6500 items processed
7000 items processed
7500 items processed
8000 items processed
8500 items processed
9000 items processed
9500 items processed
10000 items processed
10500 items processed
11000 items processed
11500 items processed
12000 items processed
12500 items processed
13000 items processed
13500 items processed
14000 items processed
14500 items processed
15000 items processed
15500 items processed
16000 items processed
16500 items processed
17000 items processed
17500 items processed
*** All items encoded! ***


## Part II. Spatial Pooler

In [6]:
allSequences = []
outputColumns = np.zeros(sp.getNumColumns(), dtype="uint32")
columnUsage = np.zeros(sp.getNumColumns(), dtype="uint32")

# Set epochs for spatial-pooling:
spEpochs = 4

for epoch in range(spEpochs):
    print "Training epoch: " + str(epoch)
    
    #randomize records in training set
    randomIndex = np.random.permutation(np.arange(numTrainingItems))
    
    for i in range(numTrainingItems):
        sp.compute(trainSet[randomIndex[i]], True, outputColumns)
        # Populate array for Yuwei plot:
        for col in outputColumns.nonzero():
            columnUsage[col] += 1                        
        if epoch == (spEpochs - 1):
            allSequences.append(outputColumns.nonzero()) 

for i in range(maxItems - numTrainingItems):
    if i > 0 and i % 500 == 0:
        print str(i) + " items processed"    
    sp.compute(nonTrainSet[i], False, outputColumns)
    allSequences.append(outputColumns.nonzero())
    # Populate array for Yuwei plot:
    for col in outputColumns.nonzero():
        columnUsage[col] += 1                

print "*** All items processed! ***"

Training epoch: 0
Training epoch: 1
Training epoch: 2
Training epoch: 3
500 items processed
1000 items processed
1500 items processed
2000 items processed
2500 items processed
*** All items processed! ***


In [9]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_SP")
plt.close()

## Part III. Temporal Memory 

In [7]:
spikeTrains = np.zeros((tm.numberOfCells(), totalTS), dtype = "uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
ts = 0

entropyX = []
entropyY = []

negPCCX_cells = []
negPCCY_cells = []

negPCCX_cols = []
negPCCY_cols = []

# Randomly generate the indices of the columns to keep track during simulation time
colIndices = np.random.permutation(tm.numberOfColumns())[0:4] # keep track of 4 columns

for s in range(maxItems):
    if s % 500 == 0:
        print str(s) + " items processed"
        
    tm.compute(allSequences[s][0].tolist(), learn=True)
    for cell in tm.getActiveCells():
        spikeTrains[cell, ts] = 1            
    # Obtain active columns:
    activeColumnsIndices = [tm.columnForCell(i) for i in tm.getActiveCells()]
    currentColumns = [1 if i in activeColumnsIndices else 0 for i in range(tm.numberOfColumns())]
    for col in np.nonzero(currentColumns)[0]:
        columnUsage[col] += 1                
    
    if s > 0 and s % 2500 == 0:
        print "++ Analyzing correlations (cells at random) ++"                
        subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), ts)
        (corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
        negPCCX_cells.append(s)
        negPCCY_cells.append(numNegPCC)                
        print "++ Generating histogram ++"
        bins = 300
        plt.hist(corrMatrix.ravel(), bins, alpha=0.5)                
        # Set range for plot appropriately!
        plt.xlim(-0.05,0.1)
        plt.xlabel("PCC")
        plt.ylabel("Frequency")
        plt.savefig("cellsHist" + str(s))
        plt.close()
        # Compute entropy
        print "++ Computing entropy ++"
        entropyX.append(s)
        entropyY.append(computeEntropy(subSpikeTrains))  
        
        print "++ Analyzing correlations (whole columns) ++"                
        subSpikeTrains = subSampleWholeColumn(spikeTrains, colIndices, tm.getCellsPerColumn(), ts)
        (corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
        negPCCX_cols.append(s)
        negPCCY_cols.append(numNegPCC)                
        print "++ Generating histogram ++"
        bins = 100
        plt.hist(corrMatrix.ravel(), bins, alpha=0.5)
        plt.xlabel("PCC")
        plt.ylabel("Frequency")
        plt.savefig("colsHist_" + str(s))
        plt.close() 
        print "++ Generating heatmap ++"
        plt.imshow(corrMatrix, cmap='spectral', interpolation='nearest')
        cb = plt.colorbar()
        cb.set_label('PCC')
        plt.savefig("colsHeatMap_" + str(s))
        plt.close() 
        
    ts += 1
                
print "***All items processed!***"

0 items processed
500 items processed
1000 items processed
1500 items processed
2000 items processed
2500 items processed
++ Analyzing correlations (cells at random) ++
++ Generating histogram ++
++ Computing entropy ++
++ Analyzing correlations (whole columns) ++
++ Generating histogram ++
++ Generating heatmap ++
3000 items processed
3500 items processed
4000 items processed
4500 items processed
5000 items processed
++ Analyzing correlations (cells at random) ++
++ Generating histogram ++
++ Computing entropy ++
++ Analyzing correlations (whole columns) ++
++ Generating histogram ++
++ Generating heatmap ++
5500 items processed
6000 items processed
6500 items processed
7000 items processed
7500 items processed
++ Analyzing correlations (cells at random) ++
++ Generating histogram ++
++ Computing entropy ++
++ Analyzing correlations (whole columns) ++
++ Generating histogram ++
++ Generating heatmap ++
8000 items processed
8500 items processed
9000 items processed
9500 items processed

In [10]:
# plot trace of negative PCCs
plt.plot(negPCCX_cells, negPCCY_cells)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cells")
plt.close()

plt.plot(negPCCX_cols, negPCCY_cols)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cols")
plt.close()

In [11]:
# print computeEntropy()
plt.plot(entropyX, entropyY)
plt.xlabel("Time")
plt.ylabel("Entropy")
plt.savefig("entropyTM")
plt.close()

In [12]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_TM")
plt.close()

## Part IV. Analysis of Spike Trains

In [8]:
simpleAccuracyTest("periodic", tm, allSequences)

2352
Accuracy: 0
Active cols: [  13   34   60  166  172  191  204  206  228  251  261  279  377  395  478
  480  535  572  725  838  883 1061 1186 1189 1217 1348 1364 1384 1460 1562
 1588 1667 1670 1680 1686 1782 1812 1878 1912 1963]
Predicted cols: [  13   34   60  109  132  147  149  166  172  191  197  200  204  206  219
  228  251  258  261  279  315  332  377  395  405  427  478  480  534  535
  572  663  725  754  772  801  838  876  883  929  965 1038 1061 1093 1100
 1186 1187 1189 1202 1217 1238 1257 1263 1266 1301 1348 1364 1384 1429 1450
 1460 1461 1552 1560 1562 1575 1588 1639 1646 1667 1670 1680 1699 1720 1738
 1742 1764 1770 1782 1791 1812 1856 1878 1882 1912 1916 1963 2020 2029 2037
 2045 2046]

Accuracy: 0.0
Active cols: [  83  174  185  287  368  453  517  540  697  721  742  762  799  805  893
  925  931  949 1041 1050 1069 1085 1132 1183 1210 1236 1259 1280 1357 1395
 1426 1481 1618 1717 1795 1837 1845 1863 1903 2002]
Predicted cols: [  13   34   60   65  149  191  20

In [13]:
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), totalTS)

In [14]:
isi = computeISI(subSpikeTrains)

250 cells processed
500 cells processed
750 cells processed
**All cells processed**


In [15]:
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 100
plt.hist(isi, bins)
# plt.xlim(0,4000)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiTM")
plt.close()

## Part V. Save TM

In [16]:
saveTM(allSequences, tm)

NameError: name 'saveTM' is not defined

In [None]:
# to load the TM back from the file do:
with open('tm.nta', 'rb') as f:
    proto2 = TemporalMemoryProto_capnp.TemporalMemoryProto.read(f, traversal_limit_in_words=2**61)
tm = TM.read(proto2)
# to load sequences from text file do:
with open('sequences.txt', 'r') as f:
    allSequences = json.load(f)

## Part VI. Analysis of Input

In [None]:
overlapMatrix = inputAnalysis(allSequences, "periodic")

In [None]:
# show heatmap of overlap matrix
plt.imshow(overlapMatrix, cmap='spectral', interpolation='nearest')
cb = plt.colorbar()
cb.set_label('Overlap Score')
plt.savefig("overlapScore_heatmap")
plt.close()
# plt.show()

# generate histogram
bins = 60
(n, bins, patches) = plt.hist(overlapMatrix.ravel(), bins, alpha=0.5)

plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist")

plt.xlim(0.5,1)
plt.ylim(0,1000000)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist_ZOOM")
plt.close()