In [None]:
from htm.bindings.sdr import SDR
from htm.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
import numpy as np
import pandas as pd
import pathlib
import datetime
import csv
from datetime import datetime
import os
from htm.encoders.rdse import RDSE, RDSE_Parameters
import time
import traceback
from sklearn.metrics import accuracy_score
from tqdm import tqdm


In [2]:
inputSources = [
   "monthly_sp500_pca.csv",
   "weekly_dow_jones.csv",
   "weekly_nasdaq.csv",
   "weekly_sp500.csv",
   "monthly_vix_close.csv",
   "monthly_vix_high.csv",
   "monthly_vix_low.csv",
   "monthly_vix_open.csv",
   "daily_natural_gas.csv",
   "daily_oil_prices.csv",
   "value1_vix_close.csv",
   "value1_vix_high.csv",
   "value1_vix_low.csv",
   "value1_vix_open.csv",
   "monthly_gold_prices.csv"
]

In [3]:

config = {
    'enc': {
        "value" :
            {'resolution': 0.88, 'size': 700, 'sparsity': 0.02},
        "time": 
            {'timeOfDay': (30, 1), 'weekend': 21}
    },
    'sp': {
        'inputDimensions': None,
        'columnDimensions': (1638,),
        'potentialPct': 0.85,
        'potentialRadius': None,
        'globalInhibition': True,
        'localAreaDensity': 0.04395604395604396,
        'synPermInactiveDec': 0.006,
        'synPermActiveInc': 0.04,
        'synPermConnected': 0.13999999999999999,
        'boostStrength': 3.0,
        'wrapAround': True,
        'seed': 1,
        'learn': False,
    },
    'tm': {
        'cellsPerColumn': 13,
        'activationThreshold': 17,
        'initialPermanence': 0.21,
        'minThreshold': 10,
        'maxNewSynapseCount': 32,
        'permanenceIncrement': 0.1,
        'permanenceDecrement': 0.1,
        'predictedSegmentDecrement': 0.0,
        'maxSegmentsPerCell': 128,
        'maxSynapsesPerSegment': 64,
        'learn': True
    },
    'anomaly': {'period': 1000},
    'learnRows': 100,
    'reflexSize': 2048,
    'accuracyThreshold': 0.5,
    'controlThreshold': 4
}


In [4]:
class ReflexiveMemory:
  def __init__(self, reflexSize, dimensions_dense, dimensions_sparse_sp):
    self.acKey0 = None
    self.pairs = {}
    self.tableSize  = reflexSize
    self.dimensions_dense = dimensions_dense
    self.dimensions_sparse_sp = dimensions_sparse_sp

  def add(self, denseColumns):
    acKey1 = '-'.join(map(str, denseColumns.sparse))
    if(self.acKey0 != None):

      sequence = self.pairs.get(self.acKey0, {})
      sequence_data = sequence.get(acKey1, {
         "count": 0,
         "time": datetime.now()
      })
      if sequence_data["count"] < 256:
        sequence_data["count"] = sequence_data["count"] + 1
      sequence_data["time"] = datetime.now()

      if self.pairs.get(self.acKey0, None) is None:
        self.pairs[self.acKey0] = { acKey1: sequence_data }
      else:
        self.pairs[self.acKey0][acKey1] = sequence_data
        
      table_entries = 0
      oldKey1 = None
      oldKey2 = None
      oldTime = datetime.now()
      for key1, value1 in self.pairs.items():
        table_entries = table_entries + len(value1.items())
        for key2, value2 in value1.items():
          if value2['time'] < oldTime:
            oldKey1 = key1
            oldKey2 = key2
            oldTime = value2['time']
      if table_entries > self.tableSize:
        del self.pairs[oldKey1][oldKey2]
        if len(self.pairs[oldKey1].items()) == 0:
          del self.pairs[oldKey1]

    self.acKey0 = acKey1

  def predict(self, denseColumns):
    return_count = 0
    return_sdr = None

    acKey = '-'.join(map(str, denseColumns.sparse))
    sequences = self.pairs.get(acKey, {})
    for sequence_key, sequence_data in sequences.items():
      if sequence_data["count"] > return_count:
        return_count = sequence_data["count"]
        return_sdr = sequence_key

    if return_sdr is not None:
      tmp_sdr = SDR( self.dimensions_dense )
      tmp_sdr.sparse = list(map(int, return_sdr.split('-')))
      return_sdr = tmp_sdr
    else:
      return_count = None

    return return_count, return_sdr


In [5]:
class ControlHistory:
  def __init__(self):
    self.anomalyRM = []
    self.anomalyTM = []
    self.anomalyNU = []
    self.historyRM = []
    self.historyTM = []
    self.historyGT = []

  def anomalyScore(self, y, x):
      if np.count_nonzero(y) != 0:
          return 1 - np.count_nonzero((x & y)) / np.count_nonzero(y)
      return 1

  def add(self, denseColumns1, sp, tm, rm):

    if rm.acKey0 is not None:

      denseColumns0 = SDR( rm.dimensions_dense )
      denseColumns0.sparse = list(map(int, rm.acKey0.split('-')))

      tm.activateDendrites(True)
      predictiveCells = tm.getPredictiveCells()

      predictiveColumns = SDR( rm.dimensions_sparse_sp )
      predictiveColumns.sparse = list(set(sorted(list(np.where(predictiveCells.dense == 1)[0]))))

      reflexiveColumns = SDR( rm.dimensions_sparse_sp )
      reflexiveCount, denseReflexiveColumns = rm.predict(denseColumns0)
      if denseReflexiveColumns is not None:
        sp.compute(denseReflexiveColumns, False, reflexiveColumns)

      activeColumns0 = SDR( rm.dimensions_sparse_sp )
      sp.compute(denseColumns0, False, activeColumns0)

      activeColumns1 = SDR( rm.dimensions_sparse_sp )
      sp.compute(denseColumns1, False, activeColumns1)

      self.historyRM.append( reflexiveColumns.dense )
      self.historyTM.append( predictiveColumns.dense )
      self.historyGT.append( activeColumns1.dense )

      self.anomalyNU.append(tm.anomaly)
      self.anomalyRM.append( self.anomalyScore(activeColumns1.dense, reflexiveColumns.dense) )
      self.anomalyTM.append( self.anomalyScore(activeColumns1.dense, predictiveColumns.dense) )


In [6]:
class ControlUnit:
  def __init__(self, controlThreshold):
    self.anomalyCU = []
    self.historyCU = []
    self.countRMCU = 0
    self.controlThreshold = controlThreshold

  def compute(self, ch, rm):

    if rm.acKey0 is not None:

      if (len(ch.anomalyRM) > self.controlThreshold) and (sum(ch.anomalyRM[(-1-self.controlThreshold):-1]) > sum(ch.anomalyTM[-1-self.controlThreshold:-1])):
        self.anomalyCU.append( ch.anomalyTM[-1] )
        self.historyCU.append( ch.historyTM[-1] )
        self.countRMCU = self.countRMCU + 1
      else:
        self.anomalyCU.append( ch.anomalyRM[-1] )
        self.historyCU.append( ch.historyRM[-1] )


In [7]:
input_path = pathlib.Path('../datasets/numenta')
dataset_metrics = []

pbar = tqdm(total=len(inputSources))
for dataset in inputSources:

    tm_infer_tm = 0
    tm_infer_rm = 0

    records = []
    with open(input_path.joinpath(dataset), "r") as fin:
        reader = csv.reader(fin)
        headers = next(reader)
        next(reader)
        next(reader)
        for record in reader:
            records.append(record)
        
    scalarEncoderParams = RDSE_Parameters()
    scalarEncoderParams.size = config["enc"]["value"]["size"]
    scalarEncoderParams.sparsity = config["enc"]["value"]["sparsity"]
    scalarEncoderParams.resolution = config["enc"]["value"]["resolution"]
    scalarEncoder = RDSE( scalarEncoderParams )
    encodingWidth = (scalarEncoder.size)

    config['sp']['inputDimensions'] = (encodingWidth,)
    config['sp']['potentialRadius'] = encodingWidth

    sp = SpatialPooler(
        inputDimensions = config['sp']['inputDimensions'],
        columnDimensions = config['sp']['columnDimensions'],
        potentialPct = config['sp']['potentialPct'],
        potentialRadius = config['sp']['potentialRadius'],
        globalInhibition = config['sp']['globalInhibition'],
        localAreaDensity = config['sp']['localAreaDensity'],
        synPermInactiveDec = config['sp']['synPermInactiveDec'],
        synPermActiveInc = config['sp']['synPermActiveInc'],
        synPermConnected = config['sp']['synPermConnected'],
        boostStrength = config['sp']['boostStrength'],
        wrapAround = config['sp']['wrapAround'],
        seed = config['sp']['seed']
    )

    tm = TemporalMemory(
        columnDimensions = config['sp']['columnDimensions'],
        cellsPerColumn = config['tm']['cellsPerColumn'],
        activationThreshold = config['tm']['activationThreshold'],
        initialPermanence = config['tm']['initialPermanence'],
        connectedPermanence = config['sp']['synPermConnected'],
        minThreshold = config['tm']['minThreshold'],
        maxNewSynapseCount = config['tm']['maxNewSynapseCount'],
        permanenceIncrement = config['tm']['permanenceIncrement'],
        permanenceDecrement = config['tm']['permanenceDecrement'],
        predictedSegmentDecrement = config['tm']['predictedSegmentDecrement'],
        maxSegmentsPerCell = config['tm']['maxSegmentsPerCell'],
        maxSynapsesPerSegment = config['tm']['maxSynapsesPerSegment']
    )

    rm = ReflexiveMemory( config['reflexSize'] , config['sp']['inputDimensions'], config['sp']['columnDimensions'])

    ch = ControlHistory()
    cu01 = ControlUnit( 1 )
    cu02 = ControlUnit( 2 )
    cu04 = ControlUnit( 4 )
    cu08 = ControlUnit( 8 )
    cu16 = ControlUnit( 16 )

    try:
        
        for count, record in enumerate(records):
            
            learn_sp = config['sp']['learn']
            learn_tm = config['tm']['learn']
            if count < config['learnRows']:
                learn_sp = True
                learn_tm = True

            consumption = float(record[1])
            consumptionBits = scalarEncoder.encode(consumption)
            encoding = SDR( consumptionBits )

            ch.add(encoding, sp, tm, rm)
            cu01.compute(ch, rm)
            cu02.compute(ch, rm)
            cu04.compute(ch, rm)
            cu08.compute(ch, rm)
            cu16.compute(ch, rm)

            tmp_tm = time.time()
            rm.add(encoding)
            rm.predict(encoding)
            tm_infer_rm = tm_infer_rm + (time.time() - tmp_tm)

            activeColumns = SDR( sp.getColumnDimensions() )

            tmp_tm = time.time()
            sp.compute(encoding, learn_sp, activeColumns)
            tm.compute(activeColumns, learn=learn_tm)
            tm_infer_tm = tm_infer_tm + (time.time() - tmp_tm)

    except Exception as e:
        print(traceback.format_exc())
        print(e)

    def match(y, x, idx1, ch, accuracyThreshold):
        n_samples = len(y)
        score1 = ch.anomalyScore(y[idx1], x[idx1])
        if score1 > (1 - accuracyThreshold):
            idx_closest = None
            score_closest = None
            for idx2 in range(n_samples):
                score2 = ch.anomalyScore(y[idx2], x[idx1])
                if score_closest is None or score_closest > score2:
                    score_closest = score2
                    idx_closest = idx2
            return idx_closest
        return idx1

    def calculateMetrics(config, cu, ch, tm_infer_rm, tm_infer_tm, suffix):

        metric = {}

        n_samples = len(ch.historyGT)

        Y_labels = list(range(n_samples))
        X_labels = [ match(ch.historyGT, cu.historyCU, idx, ch, config['accuracyThreshold']) for idx in range(n_samples)]

        total_infe_time = 0
        avg_infer_time_rm = tm_infer_rm / len(ch.anomalyRM)
        avg_infer_time_tm = tm_infer_tm / len(ch.anomalyTM)
        total_infe_time = total_infe_time + (avg_infer_time_rm * cu.countRMCU)
        total_infe_time = total_infe_time + (avg_infer_time_tm * (len(cu.anomalyCU) - cu.countRMCU))

        metric['rm-count-'+suffix] = cu.countRMCU
        metric['anomaly-samples-'+suffix] = len(cu.historyCU)
        metric['tm-count-'+suffix] = len(cu.historyCU) - cu.countRMCU

        metric['total-infer-time-'+suffix] = total_infe_time
        metric['infer-time-'+suffix] = total_infe_time / len(cu.anomalyCU)
        metric['accuracy-'+suffix] = accuracy_score(Y_labels, X_labels)

        X_labels = [ match(ch.historyGT, ch.historyTM, idx, ch, config['accuracyThreshold']) for idx in range(n_samples)]
        metric['accuracy-improvement-'+suffix] = metric['accuracy-'+suffix] - accuracy_score(Y_labels, X_labels)
        metric['infer-speedup-'+suffix] = 1 - (total_infe_time / tm_infer_tm)

        return metric

    metric = {}
    metric['dataset'] = dataset
    metric.update( calculateMetrics(config, cu01, ch, tm_infer_rm, tm_infer_tm, 'cu01') )
    metric.update( calculateMetrics(config, cu02, ch, tm_infer_rm, tm_infer_tm, 'cu02') )
    metric.update( calculateMetrics(config, cu04, ch, tm_infer_rm, tm_infer_tm, 'cu04') )
    metric.update( calculateMetrics(config, cu08, ch, tm_infer_rm, tm_infer_tm, 'cu08') )
    metric.update( calculateMetrics(config, cu16, ch, tm_infer_rm, tm_infer_tm, 'cu16') )

    dataset_metrics.append(metric)
    pbar.update(1)
    # break

pbar.close()

100%|██████████| 15/15 [05:05<00:00, 20.34s/it]


In [8]:
df = pd.DataFrame(dataset_metrics)
df.to_csv('metrics.csv', index=False)

In [9]:
table_1_features = ['dataset','accuracy-cu01','accuracy-cu02','accuracy-cu04','accuracy-cu08','accuracy-cu16',]
df[table_1_features]

Unnamed: 0,dataset,accuracy-cu01,accuracy-cu02,accuracy-cu04,accuracy-cu08,accuracy-cu16
0,monthly_sp500_pca.csv,0.364412,0.387569,0.402803,0.407069,0.406459
1,weekly_dow_jones.csv,0.027417,0.032708,0.035594,0.036556,0.036556
2,weekly_nasdaq.csv,0.180288,0.197115,0.202885,0.203365,0.202885
3,weekly_sp500.csv,0.323883,0.34839,0.356079,0.35752,0.358001
4,monthly_vix_close.csv,0.879506,0.933086,0.961235,0.966173,0.966667
5,monthly_vix_high.csv,0.893086,0.934074,0.952593,0.955556,0.95679
6,monthly_vix_low.csv,0.930123,0.960741,0.97358,0.97679,0.977284
7,monthly_vix_open.csv,0.892346,0.937284,0.960741,0.966914,0.966914
8,daily_natural_gas.csv,0.971024,0.975164,0.981718,0.986202,0.990341
9,daily_oil_prices.csv,0.978193,0.986747,0.991566,0.994337,0.995181


In [None]:
table_2_features = ['dataset','total-infer-time-cu01','total-infer-time-cu02','total-infer-time-cu04','total-infer-time-cu08','total-infer-time-cu16','infer-time-cu01','infer-time-cu02','infer-time-cu04','infer-time-cu08','infer-time-cu16']
df[table_2_features]

Unnamed: 0,dataset,total-infer-time-cu01,total-infer-time-cu02,total-infer-time-cu04,total-infer-time-cu08,total-infer-time-cu16,infer-time-cu01,infer-time-cu02,infer-time-cu04,infer-time-cu08,infer-time-cu16
0,monthly_sp500_pca.csv,0.231292,0.171732,0.124914,0.102986,0.099727,0.000141,0.000105,7.6e-05,6.3e-05,6.1e-05
1,weekly_dow_jones.csv,0.502255,0.415861,0.336041,0.276176,0.229457,0.000242,0.0002,0.000162,0.000133,0.00011
2,weekly_nasdaq.csv,0.401555,0.301232,0.239343,0.209387,0.200909,0.000193,0.000145,0.000115,0.000101,9.7e-05
3,weekly_sp500.csv,0.292983,0.219288,0.177327,0.164738,0.161854,0.000141,0.000105,8.5e-05,7.9e-05,7.8e-05
4,monthly_vix_close.csv,0.581509,0.327439,0.175248,0.133112,0.131225,0.000144,8.1e-05,4.3e-05,3.3e-05,3.2e-05
5,monthly_vix_high.csv,0.541906,0.315887,0.183086,0.143217,0.132605,0.000134,7.8e-05,4.5e-05,3.5e-05,3.3e-05
6,monthly_vix_low.csv,0.393566,0.249005,0.157485,0.122645,0.117272,9.7e-05,6.1e-05,3.9e-05,3e-05,2.9e-05
7,monthly_vix_open.csv,0.489882,0.30449,0.185426,0.137145,0.128473,0.000121,7.5e-05,4.6e-05,3.4e-05,3.2e-05
8,daily_natural_gas.csv,0.329058,0.304879,0.270991,0.231382,0.191101,5.7e-05,5.3e-05,4.7e-05,4e-05,3.3e-05
9,daily_oil_prices.csv,0.579075,0.453143,0.356226,0.297853,0.271929,7e-05,5.5e-05,4.3e-05,3.6e-05,3.3e-05


In [None]:
table_3_features = ['dataset','accuracy-improvement-cu01','accuracy-improvement-cu02','accuracy-improvement-cu04','accuracy-improvement-cu08','accuracy-improvement-cu16','infer-speedup-cu01','infer-speedup-cu02','infer-speedup-cu04','infer-speedup-cu08','infer-speedup-cu16']
df[table_3_features]

Unnamed: 0,dataset,accuracy-improvement-cu01,accuracy-improvement-cu02,accuracy-improvement-cu04,accuracy-improvement-cu08,accuracy-improvement-cu16,infer-speedup-cu01,infer-speedup-cu02,infer-speedup-cu04,infer-speedup-cu08,infer-speedup-cu16
0,monthly_sp500_pca.csv,-0.042048,-0.018891,-0.003656,0.000609,0.0,0.598802,0.702114,0.783325,0.821361,0.827015
1,weekly_dow_jones.csv,-0.009139,-0.003848,-0.000962,0.0,0.0,0.270676,0.396129,0.512035,0.598965,0.666805
2,weekly_nasdaq.csv,-0.024519,-0.007692,-0.001923,-0.001442,-0.001923,0.47988,0.609825,0.689988,0.728788,0.739769
3,weekly_sp500.csv,-0.034599,-0.010091,-0.002403,-0.000961,-0.000481,0.582865,0.687788,0.74753,0.765453,0.76956
4,monthly_vix_close.csv,-0.087654,-0.034074,-0.005926,-0.000988,-0.000494,0.580112,0.763568,0.87346,0.903884,0.905247
5,monthly_vix_high.csv,-0.064198,-0.02321,-0.004691,-0.001728,-0.000494,0.576131,0.752919,0.856793,0.887978,0.896279
6,monthly_vix_low.csv,-0.047407,-0.01679,-0.003951,-0.000741,-0.000247,0.512709,0.691695,0.80501,0.848147,0.8548
7,monthly_vix_open.csv,-0.075062,-0.030123,-0.006667,-0.000494,-0.000494,0.538512,0.713159,0.825321,0.870805,0.878974
8,daily_natural_gas.csv,-0.024491,-0.020352,-0.013798,-0.009314,-0.005174,0.104518,0.170317,0.262539,0.370327,0.479947
9,daily_oil_prices.csv,-0.016627,-0.008072,-0.003253,-0.000482,0.000361,0.403282,0.53305,0.63292,0.693072,0.719786


In [None]:
table_4_features = ['dataset'] + sorted(list(set(df.columns) - (set(table_1_features) | set(table_2_features) | set(table_3_features))))
df[table_4_features]

Unnamed: 0,dataset,anomaly-samples-cu01,anomaly-samples-cu02,anomaly-samples-cu04,anomaly-samples-cu08,anomaly-samples-cu16,rm-count-cu01,rm-count-cu02,rm-count-cu04,rm-count-cu08,rm-count-cu16,tm-count-cu01,tm-count-cu02,tm-count-cu04,tm-count-cu08,tm-count-cu16
0,monthly_sp500_pca.csv,1641,1641,1641,1641,1641,1165,1366,1524,1598,1609,476,275,117,43,32
1,weekly_dow_jones.csv,2079,2079,2079,2079,2079,794,1162,1502,1757,1956,1285,917,577,322,123
2,weekly_nasdaq.csv,2080,2080,2080,2080,2080,1311,1666,1885,1991,2021,769,414,195,89,59
3,weekly_sp500.csv,2081,2081,2081,2081,2081,1561,1842,2002,2050,2061,520,239,79,31,20
4,monthly_vix_close.csv,4050,4050,4050,4050,4050,2555,3363,3847,3981,3987,1495,687,203,69,63
5,monthly_vix_high.csv,4050,4050,4050,4050,4050,2568,3356,3819,3958,3995,1482,694,231,92,55
6,monthly_vix_low.csv,4050,4050,4050,4050,4050,2389,3223,3751,3952,3983,1661,827,299,98,67
7,monthly_vix_open.csv,4050,4050,4050,4050,4050,2439,3230,3738,3944,3981,1611,820,312,106,69
8,daily_natural_gas.csv,5798,5798,5798,5798,5798,799,1302,2007,2831,3669,4999,4496,3791,2967,2129
9,daily_oil_prices.csv,8300,8300,8300,8300,8300,4559,6026,7155,7835,8137,3741,2274,1145,465,163
