In [1]:
from htm.bindings.sdr import SDR, Metrics
from htm.encoders.date import DateEncoder
from htm.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
from htm.algorithms.anomaly_likelihood import AnomalyLikelihood
import numpy as np
import pandas as pd
import pathlib
import datetime
import csv
from datetime import datetime
import hashlib
import os
from htm.encoders.rdse import RDSE, RDSE_Parameters
import time
import traceback
import hashlib

In [None]:
class ReflexiveMemory:
  def __init__(self, dimensions, reflexSize):
    self.acKey0 = None
    self.pairs = {}
    self.dimensions = dimensions
    self.anomaly = []
    self.enableLearn = False
    self.tableSize  = reflexSize

  def add(self, activeColumns):
    acKey1 = '-'.join(map(str, activeColumns.sparse))
    if(self.acKey0 != None):

      sequence = self.pairs.get(self.acKey0, {})
      sequence_data = sequence.get(acKey1, {
         "count": 0,
         "time": datetime.now()
      })
      sequence_data["count"] = sequence_data["count"] + 1
      sequence_data["time"] = datetime.now()

      if self.pairs.get(self.acKey0, None) is None:
        self.pairs[self.acKey0] = { acKey1: sequence_data }
      else:
        self.pairs[self.acKey0][acKey1] = sequence_data

      table_entries = 0
      oldKey1 = None
      oldKey2 = None
      oldTime = datetime.now()
      for key1, value1 in self.pairs.items():
        table_entries = table_entries + len(value1.items())
        for key2, value2 in value1.items():
          if value2['time'] < oldTime:
            oldKey1 = key1
            oldKey2 = key2
            oldTime = datetime.now()
      if table_entries > self.tableSize:
        self.enableLearn = True
        del self.pairs[oldKey1][oldKey2]
        if len(self.pairs[oldKey1].items()) == 0:
          del self.pairs[oldKey1]

    self.acKey0 = acKey1

  def predict(self, activeColumns):
    return_count = 0
    return_sdr = None

    acKey = '-'.join(map(str, activeColumns.sparse))
    sequences = self.pairs.get(acKey, {})
    for sequence_key, sequence_data in sequences.items():
      if sequence_data["count"] > return_count:
        return_count = sequence_data["count"]
        return_sdr = sequence_key

    if return_sdr is not None:
      tmp_sdr = SDR( self.dimensions )
      tmp_sdr.sparse = list(map(int, return_sdr.split('-')))
      return_sdr = tmp_sdr
    else:
      return_count = None

    return return_count, return_sdr

  # Control Unit
  def learn(self, activeColumns1, tm):
    pred_anomaly = None

    activeColumns0 = SDR( self.dimensions )
    activeColumns0.sparse = list(map(int, self.acKey0.split('-')))

    tm.activateDendrites(True)
    predictiveColumns = SDR( self.dimensions )
    predictiveColumns.sparse = list(set(sorted(list(np.where(tm.getPredictiveCells().dense == 1)[0]))))

    reflexiveCount, reflexiveColumns = self.predict(activeColumns0)
    if reflexiveColumns is None:
        reflexiveColumns = SDR( self.dimensions )
        
    pred_anomaly = 1 - np.count_nonzero((reflexiveColumns.dense & activeColumns1.dense)) / np.count_nonzero(activeColumns1.dense)
    self.anomaly.append( pred_anomaly )

    if self.enableLearn:

      # RM-1 SM-?
      if activeColumns1.flatten() == reflexiveColumns.flatten():
          pred_anomaly = 0

      # RM-0 SM-1
      elif activeColumns1.flatten() == predictiveColumns.flatten():
        if reflexiveCount is not None:
          key1 = self.acKey0
          key2 = '-'.join(map(str, reflexiveColumns.sparse))
          reflexiveCount = reflexiveCount - 1
          reflexiveCount = 1 if reflexiveCount < 1 else reflexiveCount
          self.pairs[key1][key2]["count"] = reflexiveCount

        key1 = self.acKey0
        if self.pairs.get(key1, None) is None:
          self.pairs[key1] = {}
        key2 = '-'.join(map(str, predictiveColumns.sparse))
        key2_data = self.pairs[key1].get(key2, {
          "count": 1,
          "time": datetime.now()
        })
        self.pairs[key1][key2] = key2_data

      # RM-0 SM-0
      else:
        if reflexiveCount is not None:
          key1 = self.acKey0
          key2 = '-'.join(map(str, reflexiveColumns.sparse))
          reflexiveCount = reflexiveCount - 1
          reflexiveCount = 1 if reflexiveCount < 1 else reflexiveCount
          self.pairs[key1][key2]["count"] = reflexiveCount


  def compute(self, activeColumns, tm):
    if self.acKey0 is not None:
      self.learn(activeColumns, tm)  
    self.add(activeColumns)
  
  def save_to_csv(self, dataset_name, time_with_rm):

    save_dir='./saved_reflex_data/'
    # Ensure the directory exists
    os.makedirs(save_dir, exist_ok=True)
    
    # Create a filename based on the dataset name, in the specified directory
    filename = os.path.join(save_dir, "Sabrina_4CAM_delay_times.xlsx")
  
    # filename = os.path.join(save_dir, f"{dataset_name}_reflex_memory.csv")
    
    # # Save self.pairs to a CSV file
    # with open(filename, 'w', newline='') as f:
    #   writer = csv.writer(f)
    #   # writer.writerow(['Key (1024 bits)', 'Values (1024 bits)'])
        
    #     # Write each key (as 1024 bits) and all corresponding values (also 1024 bits each)
    # print("The name of the dataset: ",dataset_name)  
    # print("No of keys: ", len(self.pairs.items()))
    valueCount = []
    frequencyCount = []
    for key1, value1 in self.pairs.items():
        valueCount.append(len(value1.items()))
        
        # print("\n"+hashlib.md5(key1.encode()).hexdigest(), len(value1.items()))
        for key2, value2 in value1.items():
            result = hashlib.md5(key1.encode())
            # print(result.hexdigest(), end=' ')
            result = hashlib.md5(key2.encode())
            # print(result.hexdigest(), end=' ')
            # print(value2["count"], end=' ')
            # print(value2["time"].timestamp())
            frequencyCount.append(value2["count"])

    # print("Values count per key: ",valueCount)
    # print("Highest Value count: ", max(valueCount))
    # print("Frequency count: ", frequencyCount)
    # print(f"Time taken with Reflexive Memory: {time_with_rm:.2f} seconds")
    # print("length",len(frequencyCount))
    

    
    # Data to be written into the Excel file
    data = {
        "Dataset Name": [dataset_name],
        "No of Keys": [len(self.pairs.items())],
        "Values Count per Key": [valueCount],
        "Highest Value Count": [max(valueCount)],
        "Frequency count": [frequencyCount],
        "Time Taken with Reflexive Memory (seconds)": [time_with_rm]
    }

    # Convert the new data into a pandas DataFrame
    new_df = pd.DataFrame(data)




    # Check if the Excel file already exists
    if os.path.exists(filename):
        # If the file exists, load the existing data
        existing_df = pd.read_excel(filename)
        # Append the new data to the existing DataFrame
        updated_df = pd.concat([existing_df, new_df], ignore_index=True)
    else:
        # If the file does not exist, the new data is the only data
        updated_df = new_df

    # Write the updated DataFrame to the Excel file
    updated_df.to_excel(filename, index=False)

    print("Data written to", filename)
        

            

In [3]:
inputSources = [
   "monthly_sp500.csv",
   "weekly_dow_jones.csv",
   "weekly_nasdaq.csv",
   "weekly_sp500.csv",
   "monthly_vix_close.csv",
   "monthly_vix_high.csv",
   "monthly_vix_low.csv",
   "monthly_vix_open.csv",
   "daily_natural_gas.csv",
   "daily_oil_prices.csv",
   "value1_vix_close.csv",
   "value1_vix_high.csv",
   "value1_vix_low.csv",
   "value1_vix_open.csv"
]

In [None]:

config = {
    'enc': {
        "value" :
            {'resolution': 0.88, 'size': 700, 'sparsity': 0.02},
        "time": 
            {'timeOfDay': (30, 1), 'weekend': 21}
    },
    'sp': {
        'inputDimensions': None,
        'columnDimensions': (1638,),
        'potentialPct': 0.85,
        'potentialRadius': None,
        'globalInhibition': True,
        'localAreaDensity': 0.04395604395604396,
        'synPermInactiveDec': 0.006,
        'synPermActiveInc': 0.04,
        'synPermConnected': 0.13999999999999999,
        'boostStrength': 3.0,
        'wrapAround': True,
        'seed': 1,
        'learn': False,
    },
    'tm': {
        'cellsPerColumn': 13,
        'activationThreshold': 17,
        'initialPermanence': 0.21,
        'minThreshold': 10,
        'maxNewSynapseCount': 32,
        'permanenceIncrement': 0.1,
        'permanenceDecrement': 0.1,
        'predictedSegmentDecrement': 0.0,
        'maxSegmentsPerCell': 128,
        'maxSynapsesPerSegment': 64,
        'learn': True
    },
    'anomaly': {'period': 1000},
    'learnRows': 100,
    'reflexSize': 128
}


In [5]:
input_path = pathlib.Path('../datasets/numenta')

for dataset in inputSources:

    records = []
    with open(input_path.joinpath(dataset), "r") as fin:
        reader = csv.reader(fin)
        headers = next(reader)
        next(reader)
        next(reader)
        for record in reader:
            records.append(record)
        
    scalarEncoderParams = RDSE_Parameters()
    scalarEncoderParams.size = config["enc"]["value"]["size"]
    scalarEncoderParams.sparsity = config["enc"]["value"]["sparsity"]
    scalarEncoderParams.resolution = config["enc"]["value"]["resolution"]
    scalarEncoder = RDSE( scalarEncoderParams )
    encodingWidth = (scalarEncoder.size)

    config['sp']['inputDimensions'] = (encodingWidth,)
    config['sp']['potentialRadius'] = encodingWidth

    sp = SpatialPooler(
        inputDimensions = config['sp']['inputDimensions'],
        columnDimensions = config['sp']['columnDimensions'],
        potentialPct = config['sp']['potentialPct'],
        potentialRadius = config['sp']['potentialRadius'],
        globalInhibition = config['sp']['globalInhibition'],
        localAreaDensity = config['sp']['localAreaDensity'],
        synPermInactiveDec = config['sp']['synPermInactiveDec'],
        synPermActiveInc = config['sp']['synPermActiveInc'],
        synPermConnected = config['sp']['synPermConnected'],
        boostStrength = config['sp']['boostStrength'],
        wrapAround = config['sp']['wrapAround'],
        seed = config['sp']['seed']
    )

    tm = TemporalMemory(
        columnDimensions = config['sp']['columnDimensions'],
        cellsPerColumn = config['tm']['cellsPerColumn'],
        activationThreshold = config['tm']['activationThreshold'],
        initialPermanence = config['tm']['initialPermanence'],
        connectedPermanence = config['sp']['synPermConnected'],
        minThreshold = config['tm']['minThreshold'],
        maxNewSynapseCount = config['tm']['maxNewSynapseCount'],
        permanenceIncrement = config['tm']['permanenceIncrement'],
        permanenceDecrement = config['tm']['permanenceDecrement'],
        predictedSegmentDecrement = config['tm']['predictedSegmentDecrement'],
        maxSegmentsPerCell = config['tm']['maxSegmentsPerCell'],
        maxSynapsesPerSegment = config['tm']['maxSynapsesPerSegment']
    )

    rm = ReflexiveMemory( sp.getColumnDimensions(), config['reflexSize'] )

    enc_info = Metrics( [encodingWidth], 999999999)
    sp_info = Metrics( sp.getColumnDimensions(), 999999999 )
    tm_info = Metrics( [tm.numberOfCells()], 999999999 )
    anomaly_history = AnomalyLikelihood(config["anomaly"]["period"])

    inputs = []
    anomaly = []
    anomalyProb = []
   
    print("\n"+dataset)
    try:

        # Start time for HTM with Reflexive Memory
        start_time_with_rm = time.time()
        tm_time = 0
        tm_time_1 = 0
        rm_time_1 = 0
        
        for count, record in enumerate(records):

            consumption = float(record[1])

            inputs.append( consumption )
            consumptionBits = scalarEncoder.encode(consumption)

            encoding = SDR( consumptionBits )
            enc_info.addData( encoding )
            
            activeColumns = SDR( sp.getColumnDimensions() )

            if count < config['learnRows']:

                sp.compute(encoding, True, activeColumns)
                sp_info.addData( activeColumns )

                timestamp = time.time()
                tm.compute(activeColumns, learn=True)
                tm_time_1 = tm_time_1 + (time.time() - timestamp)

                tm_info.addData( tm.getActiveCells().flatten() )

            else: 

                sp.compute(encoding, config['sp']['learn'], activeColumns)
                sp_info.addData( activeColumns )
              
                timestamp = time.time()
                rm.compute(activeColumns, tm)
                rm_time_1 = rm_time_1 + (time.time() - timestamp)

                timestamp = time.time()
                tm.compute(activeColumns, learn=config['tm']['learn'])
                tm_time = tm_time + (time.time() - timestamp)

                tm_info.addData( tm.getActiveCells().flatten() )

            anomaly.append( tm.anomaly )
            anomalyProb.append( anomaly_history.compute(tm.anomaly) )

        print("Data Points:",count)

        # End time for HTM with Reflexive Memory
        end_time_with_rm = time.time()

        # Time taken with Reflexive Memory
        time_with_rm = end_time_with_rm - start_time_with_rm
       
        # print(f"Time taken with Reflexive Memory: {time_with_rm:.2f} seconds")

        # save the reflex memory table in the external file per dataset
        # Data Set
        # rm.save_to_csv(dataset, time_with_rm)
        print(f"Time Comparison: {rm_time_1} and {tm_time + tm_time_1} seconds")
        
        # Filter out None values from rm.anomaly and anomaly
        filtered_rm_anomaly = [x for x in rm.anomaly if x is not None]

        # Calculate averages
        average_rm_anomaly = sum(rm.anomaly) / len(rm.anomaly)
        average_tm_anomaly = sum(anomaly) / len(anomaly)

        # Print the results
        print("The anomaly scores are:", average_rm_anomaly, "and", average_tm_anomaly)
        print("Anomaly score samples:", len(rm.anomaly), "and", len(anomaly))

    except Exception as e:
        print(traceback.format_exc())
        print(e)




monthly_sp500.csv
Data Points: 1765
Time Comparison: 0.3349621295928955 and 0.28084635734558105 seconds
The anomaly scores are: 0.4608191524858192 and 0.31068327678799934
Anomaly score samples: 1665 and 1766

weekly_dow_jones.csv
Data Points: 2079
Time Comparison: 1.1966047286987305 and 0.6025938987731934 seconds
The anomaly scores are: 0.9886516759306057 and 0.9404447105887812
Anomaly score samples: 1979 and 2080

weekly_nasdaq.csv
Data Points: 2080
Time Comparison: 1.0501396656036377 and 0.665989875793457 seconds
The anomaly scores are: 0.9226010101010111 and 0.778625393219217
Anomaly score samples: 1980 and 2081

weekly_sp500.csv
Data Points: 2081
Time Comparison: 0.936962366104126 and 0.6273539066314697 seconds
The anomaly scores are: 0.8627096303774751 and 0.6528644989437883
Anomaly score samples: 1981 and 2082

monthly_vix_close.csv
Data Points: 4050
Time Comparison: 1.3510453701019287 and 1.45729660987854 seconds
The anomaly scores are: 0.3870112517580893 and 0.0976370718469004