In [1]:
from htm.bindings.sdr import SDR, Metrics
from htm.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
from htm.algorithms.anomaly_likelihood import AnomalyLikelihood
import numpy as np
import pandas as pd
import pathlib
import datetime
import csv
from datetime import datetime
import os
from htm.encoders.rdse import RDSE, RDSE_Parameters
import time
import traceback
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import pairwise_distances
from sklearn_extra.cluster import KMedoids
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from random import shuffle


In [2]:
class ReflexiveMemory:
  def __init__(self, reflexSize):
    self.acKey0 = None
    self.pairs = {}
    self.anomalyRM = []
    self.anomalyTM = []
    self.anomalyNU = []
    self.enableLearn = False
    self.tableSize  = reflexSize
    self.historyRM = []
    self.historyTM = []
    self.historyGT = []

  def add(self, denseColumns):
    acKey1 = '-'.join(map(str, denseColumns.sparse))
    if(self.acKey0 != None):

      sequence = self.pairs.get(self.acKey0, {})
      sequence_data = sequence.get(acKey1, {
         "count": 0,
         "time": datetime.now()
      })
      sequence_data["count"] = sequence_data["count"] + 1
      sequence_data["time"] = datetime.now()

      if self.pairs.get(self.acKey0, None) is None:
        self.pairs[self.acKey0] = { acKey1: sequence_data }
      else:
        self.pairs[self.acKey0][acKey1] = sequence_data
        
      table_entries = 0
      oldKey1 = None
      oldKey2 = None
      oldTime = datetime.now()
      for key1, value1 in self.pairs.items():
        table_entries = table_entries + len(value1.items())
        for key2, value2 in value1.items():
          if value2['time'] < oldTime:
            oldKey1 = key1
            oldKey2 = key2
            oldTime = value2['time']
      if table_entries > self.tableSize:
        self.enableLearn = True
        del self.pairs[oldKey1][oldKey2]
        if len(self.pairs[oldKey1].items()) == 0:
          del self.pairs[oldKey1]

    self.acKey0 = acKey1

  def predict(self, denseColumns):
    return_count = 0
    return_sdr = None

    acKey = '-'.join(map(str, denseColumns.sparse))
    sequences = self.pairs.get(acKey, {})
    for sequence_key, sequence_data in sequences.items():
      if sequence_data["count"] > return_count:
        return_count = sequence_data["count"]
        return_sdr = sequence_key

    if return_sdr is not None:
      tmp_sdr = SDR( denseColumns.dimensions )
      tmp_sdr.sparse = list(map(int, return_sdr.split('-')))
      return_sdr = tmp_sdr
    else:
      return_count = None

    return return_count, return_sdr

  def learn(self, denseColumns1, sp, tm):

    denseColumns0 = SDR( denseColumns1.dimensions )
    denseColumns0.sparse = list(map(int, self.acKey0.split('-')))

    tm.activateDendrites(True)
    predictiveCells = tm.getPredictiveCells()
    dimensions_sparse = predictiveCells.dimensions[0]

    predictiveColumns = SDR( dimensions_sparse )
    predictiveColumns.sparse = list(set(sorted(list(np.where(predictiveCells.dense == 1)[0]))))

    reflexiveColumns = SDR( dimensions_sparse )
    reflexiveCount, denseReflexiveColumns = self.predict(denseColumns0)
    if denseReflexiveColumns is not None:
      sp.compute(denseReflexiveColumns, False, reflexiveColumns)

    activeColumns0 = SDR( dimensions_sparse )
    sp.compute(denseColumns0, False, activeColumns0)

    activeColumns1 = SDR( dimensions_sparse )
    sp.compute(denseColumns1, False, activeColumns1)

    self.historyRM.append( reflexiveColumns.dense )
    self.historyTM.append( predictiveColumns.dense )
    self.historyGT.append( activeColumns1.dense )

    overlapRM = 1 - np.count_nonzero((reflexiveColumns.dense & activeColumns1.dense)) / np.count_nonzero(activeColumns1.dense)
    self.anomalyRM.append( overlapRM )

    overlapTM = 1 - np.count_nonzero((predictiveColumns.dense & activeColumns1.dense)) / np.count_nonzero(activeColumns1.dense)
    self.anomalyTM.append( overlapTM )

#    if overlapRM < (1 - 0.5):
#      oldKey1 = '-'.join(map(str, denseColumns0.sparse))
#      oldKey2 = '-'.join(map(str, denseReflexiveColumns.sparse))
#      if reflexiveCount > 1:
#        self.pairs[oldKey1][oldKey2]["count"] = reflexiveCount - 1
#      else:
#        del self.pairs[oldKey1][oldKey2]
#        if len(self.pairs[oldKey1].items()) == 0:
#          del self.pairs[oldKey1]

  def compute(self, denseColumns, sp, tm):
    if self.acKey0 is not None:
      self.learn(denseColumns, sp, tm)  
    self.add(denseColumns)
  

In [3]:
inputSources = [
   "monthly_sp500_pca.csv",
   "weekly_dow_jones.csv",
   "weekly_nasdaq.csv",
   "weekly_sp500.csv",
   "monthly_vix_close.csv",
   "monthly_vix_high.csv",
   "monthly_vix_low.csv",
   "monthly_vix_open.csv",
   "daily_natural_gas.csv",
   "daily_oil_prices.csv",
   "value1_vix_close.csv",
   "value1_vix_high.csv",
   "value1_vix_low.csv",
   "value1_vix_open.csv",
   "monthly_gold_prices.csv"
]

In [4]:

config = {
    'enc': {
        "value" :
            {'resolution': 0.88, 'size': 700, 'sparsity': 0.02},
        "time": 
            {'timeOfDay': (30, 1), 'weekend': 21}
    },
    'sp': {
        'inputDimensions': None,
        'columnDimensions': (1638,),
        'potentialPct': 0.85,
        'potentialRadius': None,
        'globalInhibition': True,
        'localAreaDensity': 0.04395604395604396,
        'synPermInactiveDec': 0.006,
        'synPermActiveInc': 0.04,
        'synPermConnected': 0.13999999999999999,
        'boostStrength': 3.0,
        'wrapAround': True,
        'seed': 1,
        'learn': False,
    },
    'tm': {
        'cellsPerColumn': 13,
        'activationThreshold': 17,
        'initialPermanence': 0.21,
        'minThreshold': 10,
        'maxNewSynapseCount': 32,
        'permanenceIncrement': 0.1,
        'permanenceDecrement': 0.1,
        'predictedSegmentDecrement': 0.0,
        'maxSegmentsPerCell': 128,
        'maxSynapsesPerSegment': 64,
        'learn': True
    },
    'anomaly': {'period': 1000},
    'learnRows': 100,
    'reflexSize': 256
}


In [5]:
def anomalyScore(y, x):
  if np.count_nonzero(y) != 0:
    return 1 - np.count_nonzero((x & y)) / np.count_nonzero(y)
  return 1

def match(y, x, idx1):
  overlap_min = 0.5
  n_samples = len(y)
  score1 = anomalyScore(y[idx1], x[idx1])
  if score1 > (1 - overlap_min):
     idx_closest = None
     score_closest = None
     for idx2 in range(n_samples):
        score2 = anomalyScore(y[idx2], x[idx1])
        if score_closest is None or score_closest > score2:
            score_closest = score2
            idx_closest = idx2
     return idx_closest
  return idx1

def roc_auc_score_multiclass(y_true, y_pred):
  scores = []
  for y_class in set(y_true):
    y_true_class = [True if x == y_class else False for x in y_true]
    y_pred_class = [True if x == y_class else False for x in y_pred]
    scores.append(roc_auc_score(y_true_class, y_pred_class))
  return sum(scores) / len(scores)


In [6]:
input_path = pathlib.Path('../datasets/numenta')
dataset_metrics = []
# shuffle(inputSources)

pbar = tqdm(total=len(inputSources))
for dataset in inputSources:

    tm_infer_tm = 0
    tm_infer_rm = 0

    records = []
    with open(input_path.joinpath(dataset), "r") as fin:
        reader = csv.reader(fin)
        headers = next(reader)
        next(reader)
        next(reader)
        for record in reader:
            records.append(record)
        
    scalarEncoderParams = RDSE_Parameters()
    scalarEncoderParams.size = config["enc"]["value"]["size"]
    scalarEncoderParams.sparsity = config["enc"]["value"]["sparsity"]
    scalarEncoderParams.resolution = config["enc"]["value"]["resolution"]
    scalarEncoder = RDSE( scalarEncoderParams )
    encodingWidth = (scalarEncoder.size)

    config['sp']['inputDimensions'] = (encodingWidth,)
    config['sp']['potentialRadius'] = encodingWidth

    sp = SpatialPooler(
        inputDimensions = config['sp']['inputDimensions'],
        columnDimensions = config['sp']['columnDimensions'],
        potentialPct = config['sp']['potentialPct'],
        potentialRadius = config['sp']['potentialRadius'],
        globalInhibition = config['sp']['globalInhibition'],
        localAreaDensity = config['sp']['localAreaDensity'],
        synPermInactiveDec = config['sp']['synPermInactiveDec'],
        synPermActiveInc = config['sp']['synPermActiveInc'],
        synPermConnected = config['sp']['synPermConnected'],
        boostStrength = config['sp']['boostStrength'],
        wrapAround = config['sp']['wrapAround'],
        seed = config['sp']['seed']
    )

    tm = TemporalMemory(
        columnDimensions = config['sp']['columnDimensions'],
        cellsPerColumn = config['tm']['cellsPerColumn'],
        activationThreshold = config['tm']['activationThreshold'],
        initialPermanence = config['tm']['initialPermanence'],
        connectedPermanence = config['sp']['synPermConnected'],
        minThreshold = config['tm']['minThreshold'],
        maxNewSynapseCount = config['tm']['maxNewSynapseCount'],
        permanenceIncrement = config['tm']['permanenceIncrement'],
        permanenceDecrement = config['tm']['permanenceDecrement'],
        predictedSegmentDecrement = config['tm']['predictedSegmentDecrement'],
        maxSegmentsPerCell = config['tm']['maxSegmentsPerCell'],
        maxSynapsesPerSegment = config['tm']['maxSynapsesPerSegment']
    )

    rm = ReflexiveMemory( config['reflexSize'] )

    # enc_info = Metrics( [encodingWidth], 999999999)
    # sp_info = Metrics( sp.getColumnDimensions(), 999999999 )
    # tm_info = Metrics( [tm.numberOfCells()], 999999999 )
   
    try:
        
        for count, record in enumerate(records):

            consumption = float(record[1])
            consumptionBits = scalarEncoder.encode(consumption)

            encoding = SDR( consumptionBits )
            # enc_info.addData( encoding )
            
            activeColumns = SDR( sp.getColumnDimensions() )

            learn_sp = config['sp']['learn']
            learn_tm = config['tm']['learn']
            if count < config['learnRows']:
                learn_sp = True
                learn_tm = True

            rm.compute(encoding, sp, tm)

            sp.compute(encoding, learn_sp, activeColumns)
            # sp_info.addData( activeColumns )

            tmp_tm = time.time()
            tm.compute(activeColumns, learn=learn_tm)
            # tm_info.addData( tm.getActiveCells().flatten() )
            tm_infer_tm = tm_infer_tm + (time.time() - tmp_tm)

            tmp_tm = time.time()
            rm.predict(encoding)
            tm_infer_rm = tm_infer_rm + (time.time() - tmp_tm)

            rm.anomalyNU.append(tm.anomaly)

    except Exception as e:
        print(traceback.format_exc())
        print(e)

    metric = {}
    metric['dataset'] = dataset

    n_samples = len(rm.historyGT)

    Y_labels = list(range(n_samples))
    X_labels = [ match(rm.historyGT, rm.historyRM, idx) for idx in range(n_samples)]
    precision, recall, fscore, support = precision_recall_fscore_support(Y_labels, X_labels, average='macro', zero_division=0.0)
    metric['total-infe-time-rm'] = tm_infer_rm
    metric['infer-time-rm'] = tm_infer_rm / len(rm.anomalyRM)
    metric['anomaly-avg-rm'] = sum(rm.anomalyRM) / len(rm.anomalyRM)
    metric['anomaly-samples-rm'] = len(rm.anomalyRM)
    metric['accuracy0-rm'] = accuracy_score(Y_labels, X_labels)
    metric['precision0-rm'] = precision
    metric['recall0-rm'] = recall
    metric['fscore0-rm'] = fscore
    metric['support0-rm'] = support
    metric['auc0-rm'] = roc_auc_score_multiclass(Y_labels, X_labels)
    
#    X = pairwise_distances(rm.historyGT, metric=anomalyScore)
#    kmedoids = KMedoids(n_clusters=12, random_state=0).fit(X)
#    X_labels = kmedoids.labels_
#    Y = pairwise_distances(rm.historyRM, rm.historyGT, metric=anomalyScore)
#    Y_labels = kmedoids.predict(Y)
#    precision, recall, fscore, support = precision_recall_fscore_support(Y_labels, X_labels, average='macro', zero_division=0.0)
#    metric['accuracy1-rm'] = accuracy_score(Y_labels, X_labels)
#    metric['precision1-rm'] = precision
#    metric['recall1-rm'] = recall
#    metric['fscore1-rm'] = fscore
#    metric['support1-rm'] = support
#    metric['auc1-rm'] = roc_auc_score_multiclass(Y_labels, X_labels)

    Y_labels = list(range(n_samples))
    X_labels = [ match(rm.historyGT, rm.historyTM, idx) for idx in range(n_samples)]
    precision, recall, fscore, support = precision_recall_fscore_support(Y_labels, X_labels, average='macro', zero_division=0.0)
    metric['total-infer-time-tm'] = tm_infer_tm
    metric['infer-time-tm'] = tm_infer_tm / len(rm.anomalyTM)
    metric['accuracy0-tm'] = accuracy_score(Y_labels, X_labels)
    metric['precision0-tm'] = precision
    metric['recall0-tm'] = recall
    metric['fscore0-tm'] = fscore
    metric['support0-tm'] = support
    metric['auc0-tm'] = roc_auc_score_multiclass(Y_labels, X_labels)

#    X = pairwise_distances(rm.historyGT, metric=anomalyScore)
#    kmedoids = KMedoids(n_clusters=12, random_state=0).fit(X)
#    X_labels = kmedoids.labels_
#    Y = pairwise_distances(rm.historyTM, rm.historyGT, metric=anomalyScore)
#    Y_labels = kmedoids.predict(Y)
#    precision, recall, fscore, support = precision_recall_fscore_support(Y_labels, X_labels, average='macro', zero_division=0.0)
#    metric['accuracy1-tm'] = accuracy_score(Y_labels, X_labels)
#    metric['precision1-tm'] = precision
#    metric['recall1-tm'] = recall
#    metric['fscore1-tm'] = fscore
#    metric['support1-tm'] = support
#    metric['auc1-tm'] = roc_auc_score_multiclass(Y_labels, X_labels)

    anomaly_scores = []
    anomaly_probability = []
    anomaly_history = AnomalyLikelihood(config["anomaly"]["period"])
    for anomaly_value in rm.anomalyRM:
        anomaly_probability.append( anomaly_history.compute(anomaly_value) )
        anomaly_scores.append( anomaly_value )
    metric['anomaly-avg-rm'] = sum(anomaly_scores) / len(anomaly_scores)
    metric['anomaly-samples-rm'] = len(anomaly_scores)
    metric['anomaly-prob-avg-rm'] = np.count_nonzero(anomaly_probability) / len(anomaly_probability)

    anomaly_scores = []
    anomaly_probability = []
    anomaly_history = AnomalyLikelihood(config["anomaly"]["period"])
    for anomaly_value in rm.anomalyTM:
        anomaly_probability.append( anomaly_history.compute(anomaly_value) )
        anomaly_scores.append( anomaly_value )
    metric['anomaly-avg-tm'] = sum(anomaly_scores) / len(anomaly_scores)
    metric['anomaly-samples-tm'] = len(anomaly_scores)
    metric['anomaly-prob-avg-tm'] = np.count_nonzero(anomaly_probability) / len(anomaly_probability)

    anomaly_scores = []
    anomaly_probability = []
    anomaly_history = AnomalyLikelihood(config["anomaly"]["period"])
    for anomaly_value in rm.anomalyNU:
        anomaly_probability.append( anomaly_history.compute(anomaly_value) )
        anomaly_scores.append( anomaly_value )
    metric['anomaly-avg-nupic'] = sum(anomaly_scores) / len(anomaly_scores)
    metric['anomaly-samples-nupic'] = len(anomaly_scores)
    metric['anomaly-prob-avg-nupic'] = np.count_nonzero(anomaly_probability) / len(anomaly_probability)

    dataset_metrics.append(metric)
    pbar.update(1)
    # break

pbar.close()

100%|██████████| 15/15 [04:19<00:00, 17.28s/it]


In [7]:
df = pd.DataFrame(dataset_metrics)
df.to_csv('metrics.csv', index=False)

In [8]:
df[['dataset','accuracy0-rm','accuracy0-tm','anomaly-avg-rm','anomaly-avg-tm','anomaly-avg-nupic']]

Unnamed: 0,dataset,accuracy0-rm,accuracy0-tm,anomaly-avg-rm,anomaly-avg-tm,anomaly-avg-nupic
0,monthly_sp500_pca.csv,0.143815,0.414381,0.835466,0.58214,0.582394
1,weekly_dow_jones.csv,0.009139,0.037037,0.98581,0.934758,0.934789
2,weekly_nasdaq.csv,0.077885,0.211058,0.909809,0.761385,0.7615
3,weekly_sp500.csv,0.13359,0.352235,0.853075,0.622744,0.622925
4,monthly_vix_close.csv,0.70963,0.971111,0.327219,0.04644,0.046676
5,monthly_vix_high.csv,0.666914,0.950617,0.351067,0.06763,0.06786
6,monthly_vix_low.csv,0.804938,0.978519,0.295288,0.036986,0.037223
7,monthly_vix_open.csv,0.666173,0.955556,0.358333,0.064002,0.064233
8,daily_natural_gas.csv,0.967058,0.994653,0.067717,0.010514,0.010684
9,daily_oil_prices.csv,0.876867,0.991325,0.242602,0.022945,0.023063


In [13]:
df[['dataset','anomaly-avg-rm','anomaly-prob-avg-rm','anomaly-avg-tm','anomaly-prob-avg-tm','anomaly-avg-nupic','anomaly-prob-avg-nupic']]

Unnamed: 0,dataset,anomaly-avg-rm,anomaly-prob-avg-rm,anomaly-avg-tm,anomaly-prob-avg-tm,anomaly-avg-nupic,anomaly-prob-avg-nupic
0,monthly_sp500_pca.csv,0.835466,0.391225,0.58214,0.391225,0.582394,0.391596
1,weekly_dow_jones.csv,0.98581,0.519481,0.934758,0.519481,0.934789,0.519712
2,weekly_nasdaq.csv,0.909809,0.519712,0.761385,0.519712,0.7615,0.519942
3,weekly_sp500.csv,0.853075,0.519942,0.622744,0.519942,0.622925,0.520173
4,monthly_vix_close.csv,0.327219,0.753333,0.04644,0.753333,0.046676,0.753394
5,monthly_vix_high.csv,0.351067,0.753333,0.06763,0.753333,0.06786,0.753394
6,monthly_vix_low.csv,0.295288,0.753333,0.036986,0.753333,0.037223,0.753394
7,monthly_vix_open.csv,0.358333,0.753333,0.064002,0.753333,0.064233,0.753394
8,daily_natural_gas.csv,0.067717,0.827699,0.010514,0.827699,0.010684,0.827729
9,daily_oil_prices.csv,0.242602,0.879639,0.022945,0.879639,0.023063,0.879653


In [9]:
df[['dataset','infer-time-rm','infer-time-tm','total-infer-time-tm','total-infe-time-rm']]

Unnamed: 0,dataset,infer-time-rm,infer-time-tm,total-infer-time-tm,total-infe-time-rm
0,monthly_sp500_pca.csv,1e-05,0.000325,0.53286,0.016428
1,weekly_dow_jones.csv,8e-06,0.000302,0.628087,0.016986
2,weekly_nasdaq.csv,9e-06,0.000355,0.73832,0.019421
3,weekly_sp500.csv,9e-06,0.000318,0.661935,0.019588
4,monthly_vix_close.csv,1.1e-05,0.000236,0.957768,0.043591
5,monthly_vix_high.csv,1.1e-05,0.000316,1.279275,0.046569
6,monthly_vix_low.csv,1e-05,0.000168,0.680929,0.039525
7,monthly_vix_open.csv,1.1e-05,0.000305,1.23596,0.044573
8,daily_natural_gas.csv,8e-06,5e-05,0.292075,0.044396
9,daily_oil_prices.csv,9e-06,0.000115,0.950603,0.071491


| Variable | Source | Description | 
| :---------------- | :------ | :------ | 
| dataset | | Dataset Name | 
| precision0-rm | RM | Precision (1-to-1 Matching) | 
| recall0-rm | RM | Recall (1-to-1 Matching) | 
| fscore0-rm | RM | F-Score (1-to-1 Matching) | 
| support0-rm | RM | Support (1-to-1 Matching) | 
| accuracy0-rm | RM | Accuracy (1-to-1 Matching) | 
| anomaly-avg-rm | RM | Average Anomaly (Ilia Method) | 
| anomaly-samples-rm | RM | Samples | 
| total-infe-time-rm | RM | Inference Time per input | 
| infer-time-rm | RM | Total Inference Time (all samples) | 
| precision0-tm | TM | Precision (1-to-1 Matching) |
| recall0-tm | TM | Recall (1-to-1 Matching) |
| fscore0-tm | TM | F-Score (1-to-1 Matching) |
| support0-tm | TM | Support (1-to-1 Matching) |
| accuracy0-tm | TM | Accuracy (1-to-1 Matching) |
| anomaly-avg-tm | TM | Average Anomaly (Ilia Method) | 
| anomaly-samples-tm | TM | Samples | 
| total-infer-time-tm | TM | Inference Time per input | 
| infer-time-tm | TM | Total Inference Time (all samples) | 
| nupic-anomaly-avg | TM | Average Anomaly (Numenta Method) |
| nupic-anomaly-prob-avg | TM | Number of Anomalous Instances (Percentage) |


In [None]:
features = [
    'dataset',
    'total-infe-time-rm',
    'infer-time-rm',
    'anomaly-avg-rm',
    'anomaly-samples-rm',
    'accuracy0-rm',
    'precision0-rm',
    'recall0-rm',
    'fscore0-rm',
    'support0-rm',
    'auc0-rm',
#    'accuracy1-rm',
#    'precision1-rm',
#    'recall1-rm',
#    'fscore1-rm',
#    'support1-rm',
#    'auc1-rm'
]
df[features]

Unnamed: 0,dataset,total-infe-time-rm,infer-time-rm,anomaly-avg-rm,anomaly-samples-rm,accuracy0-rm,precision0-rm,recall0-rm,fscore0-rm,support0-rm,auc0-rm
0,monthly_sp500_pca.csv,0.016428,1e-05,0.835466,1641,0.143815,0.133148,0.143815,0.135431,,0.571646
1,weekly_dow_jones.csv,0.016986,8e-06,0.98581,2079,0.009139,0.007776,0.009139,0.008017,,0.504331
2,weekly_nasdaq.csv,0.019421,9e-06,0.909809,2080,0.077885,0.073147,0.077885,0.074199,,0.538721
3,weekly_sp500.csv,0.019588,9e-06,0.853075,2081,0.13359,0.12832,0.13359,0.12965,,0.566587
4,monthly_vix_close.csv,0.043591,1.1e-05,0.327219,4050,0.70963,0.705368,0.70963,0.705963,,0.854779
5,monthly_vix_high.csv,0.046569,1.1e-05,0.351067,4050,0.666914,0.663575,0.666914,0.664051,,0.833416
6,monthly_vix_low.csv,0.039525,1e-05,0.295288,4050,0.804938,0.800873,0.804938,0.801549,,0.902445
7,monthly_vix_open.csv,0.044573,1.1e-05,0.358333,4050,0.666173,0.661649,0.666173,0.662329,,0.833045
8,daily_natural_gas.csv,0.044396,8e-06,0.067717,5798,0.967058,0.965831,0.967058,0.966057,,0.983526
9,daily_oil_prices.csv,0.071491,9e-06,0.242602,8300,0.876867,0.872386,0.876867,0.873202,,0.938426


In [11]:
features = [
    'dataset',
    'total-infer-time-tm',
    'infer-time-tm',
    'anomaly-avg-nupic',
    'anomaly-avg-tm',
    'anomaly-prob-avg-nupic',
    'anomaly-samples-tm',
    'accuracy0-tm',
    'precision0-tm',
    'recall0-tm',
    'fscore0-tm',
    'support0-tm',
    'auc0-tm',
#    'accuracy1-tm',
#    'precision1-tm',
#    'recall1-tm',
#    'fscore1-tm',
#    'support1-tm',
#    'auc1-tm'
]
df[features]

Unnamed: 0,dataset,total-infer-time-tm,infer-time-tm,anomaly-avg-nupic,anomaly-avg-tm,anomaly-prob-avg-nupic,anomaly-samples-tm,accuracy0-tm,precision0-tm,recall0-tm,fscore0-tm,support0-tm,auc0-tm
0,monthly_sp500_pca.csv,0.53286,0.000325,0.582394,0.58214,0.391596,1641,0.414381,0.39401,0.414381,0.399842,,0.707012
1,weekly_dow_jones.csv,0.628087,0.000302,0.934789,0.934758,0.519712,2079,0.037037,0.032548,0.037037,0.033831,,0.518287
2,weekly_nasdaq.csv,0.73832,0.000355,0.7615,0.761385,0.519942,2080,0.211058,0.197958,0.211058,0.201701,,0.605339
3,weekly_sp500.csv,0.661935,0.000318,0.622925,0.622744,0.520173,2081,0.352235,0.332475,0.352235,0.338247,,0.675962
4,monthly_vix_close.csv,0.957768,0.000236,0.046676,0.04644,0.753394,4050,0.971111,0.967029,0.971111,0.968138,,0.985552
5,monthly_vix_high.csv,1.279275,0.000316,0.06786,0.06763,0.753394,4050,0.950617,0.946884,0.950617,0.94788,,0.975303
6,monthly_vix_low.csv,0.680929,0.000168,0.037223,0.036986,0.753394,4050,0.978519,0.974531,0.978519,0.97574,,0.989257
7,monthly_vix_open.csv,1.23596,0.000305,0.064233,0.064002,0.753394,4050,0.955556,0.950607,0.955556,0.952035,,0.977772
8,daily_natural_gas.csv,0.292075,5e-05,0.010684,0.010514,0.827729,5798,0.994653,0.993966,0.994653,0.994121,,0.997326
9,daily_oil_prices.csv,0.950603,0.000115,0.023063,0.022945,0.879653,8300,0.991325,0.989705,0.991325,0.990166,,0.995662


In [12]:
#import hashlib
#for key1, value1 in rm.pairs.items():
#    for key2, value2 in value1.items():
#        result = hashlib.md5(key1.encode())
#        print(result.hexdigest(), end=' ')
#        result = hashlib.md5(key2.encode())
#        print(result.hexdigest(), end=' ')
#        print(value2["count"], end=' ')
#        print(value2["time"].timestamp())
