In [1]:
from htm.bindings.sdr import SDR, Metrics
# from htm.encoders.scalar_encoder import ScalarEncoder, ScalarEncoderParameters
from htm.encoders.date import DateEncoder
from htm.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
from htm.algorithms.anomaly_likelihood import AnomalyLikelihood
import numpy as np
import pandas as pd
import pathlib
import datetime
import csv
from tqdm import tqdm
import matplotlib.pyplot as plt
from datetime import datetime
import hashlib
import os
from htm.encoders.rdse import RDSE, RDSE_Parameters
import time
import traceback
import hashlib

In [2]:
class ReflexiveMemory:
  def __init__(self, dimensions):
    self.acKey0 = None
    self.pairs = {}
    self.dimensions = dimensions
    self.anomaly = []

  def add(self, activeColumns):
    acKey1 = '-'.join(map(str, activeColumns.sparse))
    if(self.acKey0 != None):

      sequence = self.pairs.get(self.acKey0, {})
      sequence_data = sequence.get(acKey1, {
         "count": 0,
         "time": datetime.now()
      })
      sequence_data["count"] = sequence_data["count"] + 1
      sequence_data["time"] = datetime.now()

      if self.pairs.get(self.acKey0, None) is None:
        self.pairs[self.acKey0] = { acKey1: sequence_data }
      else:
        self.pairs[self.acKey0][acKey1] = sequence_data

      table_size = 0
      oldKey1 = None
      oldKey2 = None
      oldTime = datetime.now()
      for key1, value1 in self.pairs.items():
        table_size = table_size + len(value1.items())
        for key2, value2 in value1.items():
          if value2['time'] < oldTime:
            oldKey1 = key1
            oldKey2 = key2
            oldTime = datetime.now()
      if table_size > 99:
         del self.pairs[oldKey1][oldKey2]
         if len(self.pairs[oldKey1].items()) == 0:
            del self.pairs[oldKey1]

    self.acKey0 = acKey1

  def predict(self, activeColumns):
    return_count = 0
    return_sdr = None

    acKey = '-'.join(map(str, activeColumns.sparse))
    sequences = self.pairs.get(acKey, {})
    for sequence_key, sequence_data in sequences.items():
      if sequence_data["count"] > return_count:
        return_count = sequence_data["count"]
        return_sdr = sequence_key

    if return_sdr is not None:
      tmp_sdr = SDR( self.dimensions )
      tmp_sdr.sparse = list(map(int, return_sdr.split('-')))
      return_sdr = tmp_sdr

    return return_count, return_sdr

  # Control Unit
  def learn(self, activeColumns1, tm):
    pred_correct = False
    pred_anomaly = None

    if(self.acKey0 is not None):

        activeColumns0 = SDR( self.dimensions )
        activeColumns0.sparse = list(map(int, self.acKey0.split('-')))

        tm.activateDendrites(True)
        predictiveColumns = SDR( self.dimensions )
        predictiveColumns.sparse = list(set(sorted(list(np.where(tm.getPredictiveCells().dense == 1)[0]))))

        reflexiveCount, reflexiveColumns = self.predict(activeColumns0)
        if reflexiveColumns is not None:
            
            pred_anomaly = 1 - np.count_nonzero((reflexiveColumns.dense & activeColumns1.dense)) / np.count_nonzero(activeColumns1.dense)
            
            # RM-1 SM-?
            if activeColumns1.flatten() == reflexiveColumns.flatten():
                pred_correct = True
                pred_anomaly = 0

            # RM-0 SM-1
            elif activeColumns1.flatten() == predictiveColumns.flatten():
                key1 = self.acKey0
                key2 = '-'.join(map(str, reflexiveColumns.sparse))
                self.pairs[key1][key2] = reflexiveCount - 5

                key2 = '-'.join(map(str, predictiveColumns.sparse))
                key2_data = self.pairs.get(key1, {}).get(key2, {
                  "count": 0,
                  "time": datetime.now()
                })
                key2_data = key2_data["count"] + 1
                self.pairs[key1][key2] = key2_data

            # RM-0 SM-0
            else:
                key1 = self.acKey0
                key2 = '-'.join(map(str, reflexiveColumns.sparse))
                self.pairs[key1][key2]["count"] = reflexiveCount - 1
                
    self.anomaly.append( pred_anomaly )

  def compute(self, activeColumns, tm):
    self.learn(activeColumns, tm)
    self.add(activeColumns)


  def save_to_csv(self, dataset_name, save_dir='./saved_reflex_data/'):
      # Ensure the directory exists
      os.makedirs(save_dir, exist_ok=True)
      
      # Create a filename based on the dataset name, in the specified directory
      filename = os.path.join(save_dir, f"{dataset_name}_reflex_memory.csv")
      
      # Save self.pairs to a CSV file
      with open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Key (1024 bits)', 'Values (1024 bits)'])
          
          # Write each key (as 1024 bits) and all corresponding values (also 1024 bits each)
        for key1, value1 in rm.pairs.items():
          for key2, value2 in value1.items():
            result = hashlib.md5(key1.encode())
            print(result.hexdigest(), end=' ')
            result = hashlib.md5(key2.encode())
            print(result.hexdigest(), end=' ')
            print(value2["count"], end=' ')
            print(value2["time"].timestamp())


            

In [3]:
inputSources = [
    # "hourly_numentaTM_speed_7578.csv",
    # "hourly_numentaTM_iio_us-east-1_i-a2eb1cd9_NetworkIn.csv",
    # "hourly_numentaTM_exchange-3_cpc_results.csv",
    # "hourly_numentaTM_exchange-3_cpm_results.csv",
    # "hourly_numentaTM_exchange-2_cpc_results.csv",
    # "hourly_numentaTM_exchange-2_cpm_results.csv",
    # "hourly_numentaTM_exchange-4_cpc_results.csv",
    # "hourly_numentaTM_exchange-4_cpm_results.csv",
    # "hourly_numentaTM_rogue_agent_key_hold.csv",
    # "hourly_numentaTM_TravelTime_451.csv",
    # "hourly_numentaTM_occupancy_6005.csv",
    # "hourly_numentaTM_speed_t4013.csv",
    # "hourly_numentaTM_TravelTime_387.csv",
    # "hourly_numentaTM_occupancy_t4013.csv",
    # "hourly_numentaTM_speed_6005.csv",
    # "hourly_numentaTM_art_daily_flatmiddle.csv",
    # "hourly_numentaTM_art_daily_jumpsdown.csv",
    # "hourly_numentaTM_art_daily_jumpsup.csv",
    # "hourly_numentaTM_art_daily_no_noise.csv",
    # "hourly_numentaTM_art_daily_nojump.csv",
    # "hourly_numentaTM_art_daily_perfect_square_wave.csv",
    # "hourly_numentaTM_art_daily_small_noise.csv",
    # "hourly_numentaTM_art_flatline.csv",
    # "hourly_numentaTM_art_increase_spike_density.csv",
    # "hourly_numentaTM_art_load_balancer_spikes.csv",
    # "hourly_numentaTM_art_noisy.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_24ae8d.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_53ea38.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_5f5533.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_77c1ca.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_825cc2.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_ac20cd.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_c6585a.csv",
    # "hourly_numentaTM_ec2_cpu_utilization_fe7f93.csv",
    # "hourly_numentaTM_ec2_disk_write_bytes_c0d644.csv",
    # "hourly_numentaTM_ec2_network_in_257a54.csv",
    # "hourly_numentaTM_ec2_request_latency_system_failure.csv",
    # "hourly_numentaTM_elb_request_count_8c0756.csv",
    # "hourly_numentaTM_rds_cpu_utilization_cc0c53.csv",
    # "hourly_numentaTM_rds_cpu_utilization_e47b3b.csv",
    # "hourly_numentaTM_grok_asg_anomaly.csv",
    # "hourly_numentaTM_ec2_disk_write_bytes_1ef3de.csv",
    # "hourly_numentaTM_ec2_network_in_5abac7.csv",
    # "hourly_numentaTM_rogue_agent_key_updown.csv",
    # "hourly_numentaTM_ambient_temperature_system_failure.csv",
    # "hourly_numentaTM_nyc_taxi.csv",
    # "hourly_numentaTM_Twitter_volume_AMZN.csv",
    # "hourly_numentaTM_Twitter_volume_FB.csv",
    # "hourly_numentaTM_Twitter_volume_GOOG.csv",
    # "hourly_numentaTM_Twitter_volume_KO.csv",
    # "hourly_numentaTM_Twitter_volume_CVS.csv",
    # "hourly_numentaTM_Twitter_volume_PFE.csv",
    # "hourly_numentaTM_Twitter_volume_UPS.csv",
    # "hourly_numentaTM_Twitter_volume_IBM.csv",
    # "hourly_numentaTM_Twitter_volume_AAPL.csv",
    # "hourly_numentaTM_Twitter_volume_CRM.csv",
    # "hourly_numentaTM_cpu_utilization_asg_misconfiguration.csv",
    # "hourly_numentaTM_machine_temperature_system_failure.csv",


#    "value1_pseudo_periodic_synthetic_1.csv",
#    "value1_pseudo_periodic_synthetic_2.csv",
#    "value1_pseudo_periodic_synthetic_3.csv",
#    "value1_pseudo_periodic_synthetic_4.csv",
#    "value1_pseudo_periodic_synthetic_5.csv",
#    "value1_pseudo_periodic_synthetic_6.csv",
#    "value1_pseudo_periodic_synthetic_7.csv",
#    "value1_pseudo_periodic_synthetic_8.csv",
#    "value1_pseudo_periodic_synthetic_9.csv",
#    "value1_pseudo_periodic_synthetic_10.csv",
#    "monthly_gold_prices.csv",
   "monthly_sp500.csv",
   "weekly_dow_jones.csv",
   "weekly_nasdaq.csv",
   "weekly_sp500.csv",
   "monthly_vix_close.csv",
   "monthly_vix_high.csv",
   "monthly_vix_low.csv",
   "monthly_vix_open.csv",
   "daily_natural_gas.csv",
   "daily_oil_prices.csv",
   "value1_vix_close.csv",
   "value1_vix_high.csv",
   "value1_vix_low.csv",
   "value1_vix_open.csv"
]

In [4]:

def parse_date(date_str):
    formats = {
        7: "%Y-%m",                    # Format: yyyy-mm
        10: "%Y-%m-%d",                # Format: yyyy-mm-dd
        19: "%Y-%m-%d %H:%M:%S"        # Format: yyyy-mm-dd hh-mm-ss
    }

    date_format = formats.get(len(date_str))

    if date_format:
        # Use `datetime.strptime` directly
        return datetime.strptime(date_str, date_format)
    else:
        raise ValueError(f"Date format not recognized for: {date_str}")

In [5]:

config = {
    'enc': {
        "value" :
            {'resolution': 0.88, 'size': 700, 'sparsity': 0.02},
        "time": 
            {'timeOfDay': (30, 1), 'weekend': 21}
    },
    'sp': {
        'inputDimensions': None,
        'columnDimensions': (1638,),
        'potentialPct': 0.85,
        'potentialRadius': None,
        'globalInhibition': True,
        'localAreaDensity': 0.04395604395604396,
        'synPermInactiveDec': 0.006,
        'synPermActiveInc': 0.04,
        'synPermConnected': 0.13999999999999999,
        'boostStrength': 3.0,
        'wrapAround': True,
        'seed': 1,
        'learn': False,
    },
    'tm': {
        'cellsPerColumn': 13,
        'activationThreshold': 17,
        'initialPermanence': 0.21,
        'minThreshold': 10,
        'maxNewSynapseCount': 32,
        'permanenceIncrement': 0.1,
        'permanenceDecrement': 0.1,
        'predictedSegmentDecrement': 0.0,
        'maxSegmentsPerCell': 128,
        'maxSynapsesPerSegment': 64,
        'learn': True
    },
    'anomaly': {'period': 1000},
    'learnRows': 100
}


In [6]:
input_path = pathlib.Path('../datasets/numenta')

pbar = tqdm(total=len(inputSources))
for dataset in inputSources:

    records = []
    with open(input_path.joinpath(dataset), "r") as fin:
        reader = csv.reader(fin)
        headers = next(reader)
        next(reader)
        next(reader)
        for record in reader:
            records.append(record)
    
    dateEncoder = DateEncoder(
        timeOfDay= config["enc"]["time"]["timeOfDay"], 
        weekend  = config["enc"]["time"]["weekend"]
    )

    # config["enc"]["value"]["minimum"] = min(float(r[1]) for r in records)
    # config["enc"]["value"]["maximum"] = max(float(r[1]) for r in records)
    
    scalarEncoderParams = RDSE_Parameters()
    scalarEncoderParams.size = config["enc"]["value"]["size"]
    scalarEncoderParams.sparsity = config["enc"]["value"]["sparsity"]
    scalarEncoderParams.resolution = config["enc"]["value"]["resolution"]
    scalarEncoder = RDSE( scalarEncoderParams )
    # encodingWidth = (dateEncoder.size + scalarEncoder.size)
    encodingWidth = (scalarEncoder.size)

    config['sp']['inputDimensions'] = (encodingWidth,)
    config['sp']['potentialRadius'] = encodingWidth

    sp = SpatialPooler(
        inputDimensions = config['sp']['inputDimensions'],
        columnDimensions = config['sp']['columnDimensions'],
        potentialPct = config['sp']['potentialPct'],
        potentialRadius = config['sp']['potentialRadius'],
        globalInhibition = config['sp']['globalInhibition'],
        localAreaDensity = config['sp']['localAreaDensity'],
        synPermInactiveDec = config['sp']['synPermInactiveDec'],
        synPermActiveInc = config['sp']['synPermActiveInc'],
        synPermConnected = config['sp']['synPermConnected'],
        boostStrength = config['sp']['boostStrength'],
        wrapAround = config['sp']['wrapAround'],
        seed = config['sp']['seed']
    )

    tm = TemporalMemory(
        columnDimensions = config['sp']['columnDimensions'],
        cellsPerColumn = config['tm']['cellsPerColumn'],
        activationThreshold = config['tm']['activationThreshold'],
        initialPermanence = config['tm']['initialPermanence'],
        connectedPermanence = config['sp']['synPermConnected'],
        minThreshold = config['tm']['minThreshold'],
        maxNewSynapseCount = config['tm']['maxNewSynapseCount'],
        permanenceIncrement = config['tm']['permanenceIncrement'],
        permanenceDecrement = config['tm']['permanenceDecrement'],
        predictedSegmentDecrement = config['tm']['predictedSegmentDecrement'],
        maxSegmentsPerCell = config['tm']['maxSegmentsPerCell'],
        maxSynapsesPerSegment = config['tm']['maxSynapsesPerSegment']
    )

    rm = ReflexiveMemory( sp.getColumnDimensions() )

    enc_info = Metrics( [encodingWidth], 999999999)
    sp_info = Metrics( sp.getColumnDimensions(), 999999999 )
    tm_info = Metrics( [tm.numberOfCells()], 999999999 )
    anomaly_history = AnomalyLikelihood(config["anomaly"]["period"])

    inputs = []
    anomaly = []
    anomalyProb = []

    print("\n"+dataset)
    try:

        # Start time for HTM with Reflexive Memory
        start_time_with_rm = time.time()

        for count, record in enumerate(records):

            # dateString = parse_date(record[0])
            consumption = float(record[1])
            inputs.append( consumption )
            
            # dateBits = dateEncoder.encode(dateString)
            consumptionBits = scalarEncoder.encode(consumption)

            # encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits])
            encoding = SDR( consumptionBits )
            enc_info.addData( encoding )
            
            activeColumns = SDR( sp.getColumnDimensions() )

            if count < config['learnRows']:

                sp.compute(encoding, True, activeColumns)
                sp_info.addData( activeColumns )

                tm.compute(activeColumns, learn=True)
                tm_info.addData( tm.getActiveCells().flatten() )

            else: 

                sp.compute(encoding, config['sp']['learn'], activeColumns)
                sp_info.addData( activeColumns )

                rm.compute(activeColumns, tm)

                tm.compute(activeColumns, learn=config['tm']['learn'])
                tm_info.addData( tm.getActiveCells().flatten() )

            anomaly.append( tm.anomaly )
            anomalyProb.append( anomaly_history.compute(tm.anomaly) )

        for key1, value1 in rm.pairs.items():
            print("\n"+hashlib.md5(key1.encode()).hexdigest(), len(value1.items()))
            for key2, value2 in value1.items():
                result = hashlib.md5(key1.encode())
                print(result.hexdigest(), end=' ')
                result = hashlib.md5(key2.encode())
                print(result.hexdigest(), end=' ')
                print(value2["count"], end=' ')
                print(value2["time"].timestamp())

        # End time for HTM with Reflexive Memory
        end_time_with_rm = time.time()

        # Time taken with Reflexive Memory
        time_with_rm = end_time_with_rm - start_time_with_rm
        print(f"Time taken with Reflexive Memory: {time_with_rm:.2f} seconds")

        # save the reflex memory table in the external file per dataset
        # Data Set
        # rm.save_to_csv(dataset)

    except Exception as e:
        print(traceback.format_exc())
        print(e)

    pbar.update(1)
    pbar.close()


  0%|          | 0/14 [00:00<?, ?it/s]


monthly_sp500.csv


  7%|▋         | 1/14 [00:01<00:14,  1.10s/it]


31f454675472f8d6ed3edef178c31724 2
31f454675472f8d6ed3edef178c31724 31f454675472f8d6ed3edef178c31724 23 1729043823.222085
31f454675472f8d6ed3edef178c31724 624f0ca3a09c79991c5603296a2b554f 9 1729043823.222275

624f0ca3a09c79991c5603296a2b554f 4
624f0ca3a09c79991c5603296a2b554f 624f0ca3a09c79991c5603296a2b554f 49 1729043823.320446
624f0ca3a09c79991c5603296a2b554f 3c36e05f6a23d1f8f80d9a1d83b2bd49 13 1729043823.225537
624f0ca3a09c79991c5603296a2b554f 31f454675472f8d6ed3edef178c31724 8 1729043823.219935
624f0ca3a09c79991c5603296a2b554f 26935ddcbd0e916613a9109c56bee983 1 1729043823.320881

3c36e05f6a23d1f8f80d9a1d83b2bd49 3
3c36e05f6a23d1f8f80d9a1d83b2bd49 624f0ca3a09c79991c5603296a2b554f 12 1729043823.320041
3c36e05f6a23d1f8f80d9a1d83b2bd49 3c36e05f6a23d1f8f80d9a1d83b2bd49 62 1729043823.230569
3c36e05f6a23d1f8f80d9a1d83b2bd49 535108dc176c934f012512ee58f06dd5 6 1729043823.230779

535108dc176c934f012512ee58f06dd5 4
535108dc176c934f012512ee58f06dd5 535108dc176c934f012512ee58f06dd5 28 17290438





weekly_dow_jones.csv

20cd7bb0c1e0ca1e6417cc5129066537 2
20cd7bb0c1e0ca1e6417cc5129066537 ded3c978fac6d90e3f47349581cb682f 0 1729043824.270967
20cd7bb0c1e0ca1e6417cc5129066537 5c7fa41d58fdb635f17a962d26dcfd50 1 1729043824.349006

ded3c978fac6d90e3f47349581cb682f 1
ded3c978fac6d90e3f47349581cb682f 89af6509dfc12ffbabcb965f0e120f4f 1 1729043824.271513

89af6509dfc12ffbabcb965f0e120f4f 1
89af6509dfc12ffbabcb965f0e120f4f 59a4c24d5476569adfd4483f62bf6b72 1 1729043824.271882

59a4c24d5476569adfd4483f62bf6b72 1
59a4c24d5476569adfd4483f62bf6b72 415d04d3f7c81625662fcc637f9f8390 1 1729043824.272303

415d04d3f7c81625662fcc637f9f8390 2
415d04d3f7c81625662fcc637f9f8390 f50cd71a54c582a03dd30b000ed97230 0 1729043824.272856
415d04d3f7c81625662fcc637f9f8390 ac2f151121b0d868ec969f01f8477d09 1 1729043824.273851

f50cd71a54c582a03dd30b000ed97230 1
f50cd71a54c582a03dd30b000ed97230 415d04d3f7c81625662fcc637f9f8390 1 1729043824.27317

ac2f151121b0d868ec969f01f8477d09 1
ac2f151121b0d868ec969f01f8477d09 918430