In [None]:
%qtconsole

In [None]:
# the required python libraries imported
import bnpy
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import time
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, push_notebook
from bokeh.core.validation import silence
from bokeh.core.validation.warnings import MISSING_RENDERERS
from bokeh.layouts import column
from IPython.core.display import display, HTML
import bokeh
bokeh.io.reset_output()
bokeh.io.output_notebook()

In [None]:
def run_bnp_anomaly(windows, batch_size, window_size_in_batches):
    gamma = 1.0
    sF = 1.0
    K = 25  # Initialize K component - this value places a max K the model can develop
    nLap = 10
    iname='randexamples'
    opath = f'/tmp/bnp-anomaly/coldstart-K=10/b0'  # Dynamic output path according to batch
    ll = [np.nan] * window_size_in_batches
    
    data_df = pd.DataFrame(columns=['x', 'y'])
    result_df = pd.DataFrame(columns=['idx', 'LL', 'entropy'])
    
    for ii, window in enumerate(windows):
        warm_start_model, warm_info_dict = bnpy.run(
            window, 'DPMixtureModel', 'DiagGauss', 'memoVB',
            output_path=opath,
            nLap=nLap, nTask=1, nBatch=window_size_in_batches, convergeThr=0.0001,
            gamma0=gamma, sF=sF, ECovMat='eye',
            K=K, 
            moves='birth,merge,delete,shuffle',
            initname=iname,
            ts=True, debug=False, verbose=0)
        iname=warm_info_dict['task_output_path']
        opath = f'/tmp/AsteriskK8/warmstart-K=10/b{ii +  1}'

        batch = window.make_subset(list(range(batch_size*(window_size_in_batches - 1)-1, batch_size * window_size_in_batches)))

        LP = warm_start_model.calc_local_params(batch)
        SS = warm_start_model.get_global_suff_stats(batch, LP)
        LL = warm_start_model.calcLogLikCollapsedSamplerState(SS)

        ll.pop(0)
        ll.append(LL)
        ll_normed = [i/sum(ll) for i in ll]
        entropy = -sum([i*np.log(i) for i in ll_normed])

        K_resp = np.mean(LP["resp"], axis=0)
        x_window  = SS.x
        xx_window = SS.xx

        x_window = np.vstack(x_window)
        xx_window = np.vstack(xx_window)

        index = int(ii * batch_size) + window_size_in_batches * batch_size
        x_window = x_window.flatten()
        x_window = x_window[x_window >1]
        xx_window = xx_window.flatten()

        y = (window.X[-2000:])[0::1]
        y = y.flatten()
        x = list(range(ii * batch_size, ii * batch_size + window_size_in_batches * batch_size))[-2000:][0::1]

        data_df = data_df.append()

#         # Setup the bokeh plots and render via call back
#         if ii > 10:
#             line1 = p1.line(x = x, y = y, color='blue', name='g1', line_width=1)
#             line7 = p3.scatter(x = index, y = LL, color='red', name='g4')
#             line7 = p4.scatter(x = index, y = entropy, color='red', name='g4')
#             push_notebook(handle = target)
        return pd.DataFrame(x)

In [None]:
data_sets = ["../data/test/ds0.csv",
             "../data/test/ds1.csv",
             "../data/test/ds2.csv",
             "../data/test/ds3.csv",
             "../data/test/ds4.csv",
             "../data/test/ds5.csv",
             "../data/test/ds6.csv",
             "../data/test/ds7.csv"
            ]
data = [(pd.read_csv(i, usecols=['0'])).head(1000) for i in data_sets]


In [None]:
# Graph the data for inspection if required
for df in data:
    p = figure(title="Streaming Data", x_axis_label='x', y_axis_label='y', plot_height=150, plot_width=800)
    p.line(df.index.tolist(), df['0'].tolist(), legend_label="Temp.", line_width=2)
    show(p)

In [None]:
windows = []
batch_size = 5
window_size_in_batches = 5

for d, df in enumerate(data):
    win = []
    i = 0
    while i * batch_size < (len(df) - window_size_in_batches * batch_size):
        w = df[i * batch_size:i * batch_size + window_size_in_batches * batch_size]
        win.append(bnpy.data.XData.from_dataframe(w))
        i += 1
        windows.append(win)

In [None]:
test_results = []
for i, w in enumerate(windows):
    print("hey")
    test_results.append(run_bnp_anomaly(w, batch_size, window_size_in_batches))
    test_results[i].to_csv("test_results_" + str(i) + ".csv")