Related Post: [Anomaly detection with HTM.core model on sine](https://discourse.numenta.org/t/anomaly-detection-with-htm-core-model-on-sine/8975/4)



In [None]:
import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from htm.bindings.sdr import SDR, Metrics
from htm.encoders.scalar_encoder import ScalarEncoder, ScalarEncoderParameters
from htm.bindings.algorithms import SpatialPooler
from htm.bindings.algorithms import TemporalMemory
from htm.encoders.date import DateEncoder
from htm.algorithms.anomaly_likelihood import \
    AnomalyLikelihood  # FIXME use TM.anomaly instead, but it gives worse results than the py.AnomalyLikelihood now
from htm.bindings.algorithms import Predictor

In [1]:
x = np.linspace(0, np.pi*60, 8640)
y = np.sin(x) + 1

df = pd.DataFrame(data=zip(x, y, date), index=range(len(x)), columns=['x','y', 'date'])

# making label
df['label'] = 0

df.iloc[3700:3800,3] = 1
df.iloc[4100:4150,3] = 1
df.iloc[6000:6100,3] = 1
df.iloc[6800:7000,3] = 1

# making the anomaly
df.iloc[3700:3800,1] = 0.7
df.iloc[4100:4150,1] = 0.5
df.iloc[6000:6100,1] = 0.0
df.iloc[6800:7000,1] = np.random.random_sample([200,1])

print(df.shape)

parameters = {
    'predictor': {'sdrc_alpha': 0.05},
    'sp': { 
            'columnDimensions': (2048, ),
            'potentialRadius' : 2048,
            'globalInhibition' : True
          },
    'tm': {
        'columnDimensions': (2048, ), 
        'activationThreshold': 17,
        'cellsPerColumn': 13,
        'initialPerm': 0.21,
        'maxSegmentsPerCell': 128,
        'maxSynapsesPerSegment': 64,
        'minThreshold': 10,
        'connectedPermanence': 0.13999999999999999,
        'newSynapseCount': 32,
        'permanenceDec': 0.1,
        'permanenceInc': 0.1},
    'anomaly': {
        'likelihood':
            { 
            'learningPeriod':400, # if None it will be calculated later, else value (indication is 500 for 5-min interval)
            'historicWindowSize':4000,  # default of 8640 is a month's worth of history at 5-minute intervals
            'probationaryPct': 0.1,
            'reestimationPeriod': 100,
            'estimationSamples':100}  # how often we re-estimate the Gaussian distribution
    }
}

#when changing the encoders
par = ScalarEncoderParameters()
par.activeBits = 24
par.minimum = -1
par.maximum = 3
par.size = 2048*2
scalarEncoder = ScalarEncoder(par)

dateEncoder = DateEncoder(timeOfDay=(7,4), weekend=3, dayOfWeek=7)
encodingWidth = (dateEncoder.size + scalarEncoder.size)
enc_info = Metrics([encodingWidth], 999999999)

spParams = parameters["sp"]

sp = SpatialPooler(
        inputDimensions=(encodingWidth,),
        potentialRadius = spParams["potentialRadius"],
        globalInhibition = spParams["globalInhibition"],
        columnDimensions=spParams["columnDimensions"])

sp_info = Metrics(sp.getColumnDimensions(), 999999999)

tmParams = parameters["tm"]

# tm = TemporalMemory(
#         columnDimensions=tmParams["columnDimensions"])
tm = TemporalMemory(
        columnDimensions=tmParams["columnDimensions"],
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=tmParams["connectedPermanence"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]
        )

tm_info = Metrics([tm.numberOfCells()], 999999999)

step = 5
# setup likelihood, these settings are used in NAB
anParams = parameters["anomaly"]["likelihood"]
if anParams.get("learningPeriod") is None:
    probationaryPeriod = int(math.floor(float(anParams["probationaryPct"]) * df.shape[0]))
    learningPeriod = int(math.floor(probationaryPeriod / 2.0))
else:
    learningPeriod = anParams["learningPeriod"]
anomaly_history = AnomalyLikelihood(learningPeriod=learningPeriod,
                                    estimationSamples=anParams["estimationSamples"],
                                    reestimationPeriod=anParams["reestimationPeriod"],
                                    historicWindowSize=anParams["historicWindowSize"])

predictor = Predictor(steps=[1, step], alpha=parameters["predictor"]['sdrc_alpha'])
predictor_resolution = 0.1

inputs = []
anomaly = []
anomalyLikelihood = []
log_anomalyLikelihood = []
predictions = {1: [], step: []}
for count, record in df.iterrows():

    dateBits = dateEncoder.encode(record.date)
    consumptionBits = scalarEncoder.encode(record.y)

    # Concatenate all these encodings into one large encoding for Spatial Pooling.
    encoding = SDR(encodingWidth).concatenate([consumptionBits, dateBits])
    enc_info.addData(encoding)

    # Create an SDR to represent active columns, This will be populated by the
    # compute method below. It must have the same dimensions as the Spatial Pooler.
    activeColumns = SDR(sp.getColumnDimensions())

    # Execute Spatial Pooling algorithm over input space.
    overlaps = sp.compute(encoding, True, activeColumns)
        
    sp_info.addData(activeColumns)
    
    # Execute Temporal Memory algorithm over active mini-columns.
    tm.compute(activeColumns, learn=True)
    tm_info.addData(tm.getActiveCells().flatten())

    # Predict what will happen, and then train the predictor based on what just happened.
    pdf = predictor.infer(tm.getActiveCells())
    for n in (1, step):
        if pdf[n]:
            predictions[n].append(np.argmax(pdf[n]) * predictor_resolution)
        else:
            predictions[n].append(float('nan'))

    anomaly_Likelihood = anomaly_history.anomalyProbability(record.y, tm.anomaly)
    anomaly.append(tm.anomaly)
    logAnomalyLikelihood = np.log(1.0000000001 - anomaly_Likelihood) / -23.02585084720009
    anomalyLikelihood.append(anomaly_Likelihood)
    log_anomalyLikelihood.append(logAnomalyLikelihood)
    
    predictor.learn(count, tm.getActiveCells(), int(record.y / predictor_resolution))

# Print information & statistics about the state of the HTM.
print("Encoded Input", enc_info)
print("")
print("Spatial Pooler Mini-Columns", sp_info)
print(str(sp))
print("")
print("Temporal Memory Cells", tm_info)
print(str(tm))
print("")

THRESHOLD_LIKELIHOOD = 0.3
THRESHOLD_RAW_SCORE = 0.9
df['loglikelihood_anomaly'] = log_anomalyLikelihood
df['raw_anomaly_score'] = anomaly

# Shift the predictions so that they are aligned with the input they predict.
for n_steps, pred_list in predictions.items():
    for i in range(n_steps):
        pred_list.insert(0, float('nan'))
        pred_list.pop()

# Calculate the predictive accuracy, Root-Mean-Squared
accuracy = {1: 0, step: 0}
accuracy_samples = {1: 0, step: 0}

for idx, inp in enumerate(df.y):
    for n in predictions:  # For each [N]umber of time steps ahead which was predicted.
        val = predictions[n][idx]
        if not np.isnan(val):
            accuracy[n] += (inp - val) ** 2
            accuracy_samples[n] += 1
for n in sorted(predictions):
    accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5
    print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n])
print("Random guess, mean temperature:")
print(mean_squared_error(df.y, [np.mean(df.y)]*df.y.shape[0])**0.5)

df_t = df[(df.raw_anomaly_score >=THRESHOLD_RAW_SCORE)] # (df.loglikelihood_anomaly >=THRESHOLD_LIKELIHOOD) | 

fig = make_subplots(specs=[[{"secondary_y": True}]])
    
fig.add_trace(go.Scatter(x=df.index, y=df.y, name='Sinus'), secondary_y=False)

fig.add_trace(go.Scatter(x=df_t.index, y=df_t.y, mode='markers', name='Anomaly'), secondary_y=False)

fig.add_trace(go.Scatter(x=df.index, y=predictions[1], name='prediction one step ahead'), secondary_y=False)

fig.add_trace(go.Scatter(x=df.index, y=predictions[step], name=f'prediction {step} step ahead'), secondary_y=False)

fig.add_trace(go.Scatter(x=df.index, y=anomaly, name='Anomaly score TM'), secondary_y=True)

fig.add_trace(go.Scatter(x=df.index, y=anomalyLikelihood, name='Anomaly Likelihood'), secondary_y=True)

fig.add_trace(go.Scatter(x=df.index, y=log_anomalyLikelihood, name='Log Likelihood', line_color='#ffe476'), secondary_y=True)

fig.add_trace(go.Scatter(x=df.index, y=np.array([THRESHOLD_LIKELIHOOD]*df.shape[0]), name='Threshold_Likelihood'), secondary_y=True)
fig.add_trace(go.Scatter(x=df.index, y=np.array([THRESHOLD_RAW_SCORE]*df.shape[0]), name='Threshold_Raw_score'), secondary_y=True)


fig.update_layout(autosize=False, width=1000, height=500)
fig.update_yaxes(
    title_text = "Sinus",
    title_standoff = 25,
    secondary_y=False)

fig.update_yaxes(
    title_text = "Anomaly score",
    title_standoff = 25,
    secondary_y=True)

for i in [[3700,3800], [4100,4150], [6000,6100], [6800,7000]]:
    fig.add_vrect(
        x0=i[0], x1=i[1],
        fillcolor="LightSalmon", opacity=0.5,
        layer="below", line_width=0,
    )

fig.show()

NameError: name 'date' is not defined