# HTM

Importamos librerías y definimos un par de funciones:

In [1]:
import os
import datetime
from nupic.data.file_record_stream import FileRecordStream
from htm_anomaly_detection import HTM
import numpy as np

In [2]:
def count_rows(filepath):
    with open(filepath, 'r') as f:
        return len(f.readlines())

def getRunningTime(filepath, indicator):
    with open(filepath, 'r') as csvfile:
        lines = csvfile.readlines()
        targetLine = lines[indicator]
        lastime = targetLine.split(',')[0]
        return datetime.datetime.strptime(lastime, '%Y-%m-%d %H:%M:%S')

## Establecemos los parámetros del modelo

In [3]:
'''
parameters that we need to define：
    _TIMEOFDAY: internal buffer, see this: https://nupic.docs.numenta.org/1.0.3/api/algorithms/encoders.html
    _STREAM_BUFFER: The data stream buffer, which decides how many rows of data that should be writed into csv cache file each time
    _USE_SAVED_MODEL: Use saved model or not
'''
_TIMEOFDAY = (21,4)
_STREAM_BUFFER = 60
_USE_SAVED_MODEL = False


'''
Initialize HTM anomaly detection object
'''
htm = HTM(use_saved_model = _USE_SAVED_MODEL)


'''
Run the code below once to set encoders, SP, and TM parameters.
You should define these parameters by yourself:
    - minval (in scalarEncoderArgs): The minimum possible value for this input
    - maxval (in scalarEncoderArgs): The maximum possible value for this input
    - name (in scalarEncoderArgs): The name of this input
    - clipInput (in scalarEncoderArgs): Clip input if the value exceed the min/max values. 
After this, these parameters will be writed into seperate files.

* If you don't want to change these parameters anymore, then you should just read them from files 
and comment out the codes below in this cell.
'''
encoder_params = {
   'NIRvsSecadero':{
     "minval": -50,
     "maxval": 50,
     "w": 21,
     "periodic": False,
     "n": 50,
     "radius": 0,
     "resolution": 0,
     "name": "NIRvsSecadero",
     "verbosity": 0,
     "clipInput": False,
     "forced": False,
   },

   'fecha':{
     "season": 0,
     "dayOfWeek": 0,
     "weekend": 0,
     "holiday": 0,
     "timeOfDay": _TIMEOFDAY,
     "customDays": 0,
     "name": "fecha",
     "forced": False
   }
}

# save encoder parameters
if not os.path.exists('./temp'):
    os.makedirs('./temp')
    
htm.setEncoderParams('./temp/encoders.json', encoder_params)

_SP_PARAMS = {
   'SP':{
       "spatialImp": "cpp",
       "globalInhibition": 1,
       "columnCount": 2048,
       "inputWidth": 0,
       "numActiveColumnsPerInhArea": 40,
       "seed": 1956,
       "potentialPct": 0.8,
       "synPermConnected": 0.1,
       "synPermActiveInc": 0.0001,
       "synPermInactiveDec": 0.0005,
       "boostStrength": 0.0,
   }
}

_TM_PARAMS = {
   'TM':{
       "columnCount": 2048,
       "cellsPerColumn": 32,
       "inputWidth": 2048,
       "seed": 1960,
       "temporalImp": "cpp",
       "newSynapseCount": 20,
       "maxSynapsesPerSegment": 32,
       "maxSegmentsPerCell": 128,
       "initialPerm": 0.21,
       "permanenceInc": 0.1,
       "permanenceDec": 0.1,
       "globalDecay": 0.0,
       "maxAge": 0,
       "minThreshold": 9,
       "activationThreshold": 12,
       "outputType": "normal",
       "pamLength": 3,
   }
}

# save SP and TM parameters
htm.setEncoderParams('./temp/SP.json', _SP_PARAMS)
htm.setEncoderParams('./temp/TM.json', _TM_PARAMS)

Ahora los cargamos:

In [4]:
'''
Load parameters from json files and build data stream reader to read data from cache.
* Maybe we just hard-code these path? There are many of them for users to define.
'''

NIRvsSecadero = htm.getEncoderParams('./temp/encoders.json', 'NIRvsSecadero')

fecha = htm.getEncoderParams('./temp/encoders.json', 'fecha')
SPArgs = htm.getEncoderParams('./temp/SP.json', 'SP')
TMArgs = htm.getEncoderParams('./temp/TM.json', 'TM')

input01_recordParams = {
  "NIRvsSecadero": NIRvsSecadero,
  "fecha": fecha,
}

# define the data souce
streamReader1 = FileRecordStream(streamID = 'Datos/Entrada.csv')

Contamos las filas, que serán las iteracciones del bucle:

In [5]:
filas = count_rows('Datos/Entrada.csv')
filas

20003

Creamos la red:

In [6]:
network01 = htm.createNetwork(datasource=streamReader1, recordParams=input01_recordParams, spatialParams=SPArgs, temporalParams=TMArgs)

Sacamos predicciones:

In [7]:
'''
Looping network.run() to get iterative prediction from data cache.

* network.run(1) means run this network once on next row of data in csv cache.

Before you run this cell, I already called data_simulator.getBatchData2csv once in the 
above cell to initialize the first batch of data. After this, the new data will be required 
by the while loop below when it reach the end of cache.
'''
iteration = 0
anomalias = []
def run_network_once():
        fed_in_data01, anomalyLikelihood1 = htm.run(network01)
        anomalias.append(anomalyLikelihood1)
        runTime = getRunningTime('Datos/Entrada.csv', (iteration % _STREAM_BUFFER) + 3)
        #print 'Running time:', runTime, 'fed_in_data01:', fed_in_data01,' anomaly likelihood:', anomalyLikelihood1, iteration

while(iteration+3 < filas):
    run_network_once()
    iteration += 1
    if iteration % 10 == 0: 
        print round(float(iteration)/float(filas)*100,2),"% \r",
'''
Save the model when breaking from while loop.

You should define your button in SENSEI to break this while loop and save the model 
because we usually set looping as while(1) instead of while(iteration < n)
'''
if not os.path.exists('./models'):
    os.makedirs('./models')
    
htm.save_network(network01, './models/network1.nta')

print '\nComplete.'

99.99 %  % 
Complete.


In [8]:
np.savetxt("Datos/anomalias.csv",anomalias,delimiter=",")