In [None]:
import csv
import datetime
import numpy
import os
import time

from nupic.algorithms.sdr_classifier_factory import SDRClassifierFactory
from nupic.algorithms.spatial_pooler import SpatialPooler
from nupic.algorithms.temporal_memory import TemporalMemory
from nupic.algorithms.anomaly import computeRawAnomalyScore
from nupic.algorithms.anomaly import Anomaly
from nupic.encoders.date import DateEncoder
from nupic.encoders.adaptive_scalar import AdaptiveScalarEncoder

from model_params import MODEL_PARAMS

_NUM_RECORDS = 12000

_INPUT_FILE_PATH = "data.csv"

def runHotgym(numRecords):
    modelParams = MODEL_PARAMS["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

    scalarEncoder = AdaptiveScalarEncoder(enParams["acceleration"]["w"], n = enParams["acceleration"]["n"])

    encodingWidth = scalarEncoder.getWidth()

    sp = SpatialPooler(
    inputDimensions=(encodingWidth),
    columnDimensions=(spParams["columnCount"]),
    potentialPct=spParams["potentialPct"],
    globalInhibition=spParams["globalInhibition"],
    #localAreaDensity=spParams["localAreaDensity"],
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    boostStrength=spParams["boostStrength"],
    seed=spParams["seed"],
    wrapAround=False
  )

    tm = TemporalMemory(
    columnDimensions=(tmParams["columnCount"],),
    cellsPerColumn=tmParams["cellsPerColumn"],
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    connectedPermanence=spParams["synPermConnected"],
    minThreshold=tmParams["minThreshold"],
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

    an = Anomaly(slidingWindowSize=1, mode='likelihood')
    
    classifier = SDRClassifierFactory.create()
    
    results = []
    anomalyScore = []
    
    global tp_time
    tp_time = []
    
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = reader.next()
        reader.next()
        reader.next()

        ema = 0
        FORGETING_FACTOR = 1
        
        for count, record in enumerate(reader):
            if count >= numRecords: 
                print "Finished"
                break

            start = time.time()   
            print count
            
            # Convert input data value string into float.
            acceleration = float(record[0])


            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            consumptionBits = numpy.zeros(scalarEncoder.getWidth())

            # Now we call the encoders create bit representations for each value.
            scalarEncoder.encodeIntoArray(acceleration, consumptionBits)

            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            encoding = consumptionBits
            
            print ("ENC time:{0}".format(time.time() - start)) + "[sec]"
            start = time.time()

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            activeColumns = numpy.zeros(spParams["columnCount"])

            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encoding, True, activeColumns)
            activeColumnIndices = numpy.nonzero(activeColumns)[0]
            
            print ("SP time:{0}".format(time.time() - start)) + "[sec]"
            start = time.time()
            
            if count > 1 : 
                #print tm.numberOfCells()
                #print tm.cellsPerColumn
                #print predictiveCells
                #print activeColumnIndices
                prevPredictedColumns = numpy.array(predictiveCells) / tm.cellsPerColumn
                anomalyScore.append(an.compute(activeColumnIndices, prevPredictedColumns, inputValue= acceleration, timestamp=None))

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)
            print "No of active segments:{0}".format(len(tm.getActiveSegments()))
            
            print ("TM time:{0}".format(time.time() - start)) + "[sec]"
            tp_time.append(time.time() - start)
            start = time.time()

            activeCells = tm.getActiveCells()
             
            predictiveCells = tm.getPredictiveCells()

            # Get the bucket info for this input value for classification.
            bucketIdx = scalarEncoder.getBucketIndices(acceleration)[0]

            # Run classifier to translate active cells back to scalar value.
            classifierResult = classifier.compute(
              recordNum=count,
              patternNZ=activeCells,
              classification={
              "bucketIdx": bucketIdx,
              "actValue": acceleration
            },
            learn=True,
            infer=True
            )

            print ("Classifier time:{0}".format(time.time() - start)) + "[sec]"
            start = time.time()
            
            # Print the best prediction for 1 step out.
            oneStepConfidence, oneStep = sorted(
            zip(classifierResult[1], classifierResult["actualValues"]),
            reverse=True
            )[0]
            #print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
            results.append([oneStep, oneStepConfidence * 100, None, None])
            
            print ("Store data time:{0}".format(time.time() - start)) + "[sec]"

    return results, anomalyScore

if __name__ == "__main__":

    results, anomalyScore = runHotgym(_NUM_RECORDS)

In [None]:
import pickle

from bokeh.plotting import figure
from bokeh.io import output_file, show, output_notebook
output_notebook()

with open('tp_time.dump', 'wb') as f:
    pickle.dump(tp_time, f)
    
with open('tp_time.dump', 'rb') as f:
    tp_time = pickle.load(f)
    
t= range(len(tp_time))
    
pp = figure(tools='xwheel_zoom,xpan',
title="",
x_axis_label='n',
y_axis_label='time[sec]')
pp.line(t, tp_time,legend="TP time", line_width=1, line_color = "blue")
show(pp)

In [None]:
import pickle
import csv

with open('results.dump', 'rb') as f:
    results = pickle.load(f)
    
with open('anomalyScore.dump', 'rb') as f:
    anomalyScore = pickle.load(f)
    
_INPUT_FILE_PATH = "data.csv"

from bokeh.plotting import figure
from bokeh.io import output_file, show, output_notebook
output_notebook()

predicted =[]
actual =[]
anomaly =[]

for i in range(len(results) -2):
    predicted.append(results[i][0])
    anomaly.append(anomalyScore[i] * 400)

with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()
    
    for count, record in enumerate(reader):
        actual.append(float(record[0]))

t= range(len(predicted)-1)
        
pp = figure(tools='xwheel_zoom,xpan',
title="",
x_axis_label='Time[sec]',
y_axis_label='Acceleration')
pp.line(t, actual[1:12000],legend="Acceleration", line_width=1, line_color = "blue")
#pp.line(t, predicted[:11999],legend="Y", line_width=1, line_color = "green")
pp.line(t, anomaly[:11999] ,legend="Anomaly score", line_width=1, line_color = "red")
show(pp)