In [None]:
%qtconsole

In [None]:
# the required python libraries imported
import bnpy
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import time
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, push_notebook
from bokeh.core.validation import silence
from bokeh.core.validation.warnings import MISSING_RENDERERS
from bokeh.layouts import column
from IPython.core.display import display, HTML
import bokeh
bokeh.io.reset_output()
bokeh.io.output_notebook()

# indicates to jupyer how the plots are to be displayed and sized and some other
# housekeeping particular to this notebook
display(HTML("<style>div.output_scroll { height: 600em; }</style>"))
silence(MISSING_RENDERERS, True)
%matplotlib inline
plt.rcParams['figure.figsize'] = [15, 3]

data_start = 0
data_init_size = 20000
batch_size = 2000
batchnum = int(data_init_size/batch_size)

all_data = pd.read_csv('../data/anomaly0245.csv')
all_data.drop(all_data.columns[0], inplace=True, axis=1)

init_data = all_data.head(data_init_size)
init_data = bnpy.data.XData.from_dataframe(init_data)

data_set = bnpy.data.XData.from_dataframe(all_data)

batches = []

i = 0
while i < len(all_data)- batch_size:
    df = all_data.iloc[i:i + batch_size]
    batches.append(bnpy.data.XData.from_dataframe(df))
    i += batch_size

# p = figure(title="Streaming Data", x_axis_label='x', y_axis_label='y', plot_height=350, plot_width=1200)

# add a line renderer with legend and line thickness
# p.line(all_data.index.tolist(), all_data['anomaly'].tolist(), legend_label="Temp.", line_width=2)

# show the results
# show(p)

In [None]:
# Setup the placekeeping and initilizing variables
chain = 0
x, eng_val, states, num_states = [], [], [], []
#i = 300000
step = 5000
#print(i)

# Initialize bnpy model and do initial training
# *DiagGauss* observation model
gamma = 1.0
sF = 1.0
K = 25  # Initialize K component
nLap = 10

cold_start_model, cold_info_dict = bnpy.run(
    init_data, 'DPMixtureModel', 'DiagGauss', 'memoVB',
    output_path='/tmp/AsteriskK8/coldstart-K=10/',
    nLap=nLap, nTask=1, nBatch=batchnum, convergeThr=0.0001,
    gamma0=gamma, sF=sF, ECovMat='eye',
    K=K, initname='randexamplesbydist', ts=True, debug=False)

# Get the intial graphing data
y = np.squeeze(init_data.X)
x = list(range(0, len(init_data.X)))
x_batches = []
x_batch_post = []
x_batch_pre = []
K_model = []
K_states = []
index = []

warm_start_model = cold_start_model
warm_info_dict = cold_info_dict

In [None]:
vars(warm_start_model.obsModel.Post)
# vars(warm_start_model.obsModel.Prior)

In [None]:
bokeh.io.reset_output()
bokeh.io.output_notebook()
output_notebook()                
p1 = figure(title="Dataset", plot_height=100, plot_width=1200)
p2 = figure(title="Sufficient Statistics", plot_height=100, plot_width=1200)
p3 = figure(title="K1", plot_height=100, plot_width=1200)
p4 = figure(title="K2", plot_height=100, plot_width=1200)
p5 = figure(title="K3", plot_height=100, plot_width=1200)
p6 = figure(title="K", plot_height=100, plot_width=1200)
p = column(p1, p2, p3, p4, p5, p6)
target = show(p, notebook_handle=True)


In [None]:
st_idx = int(data_init_size/batch_size)
ed_idx = int(data_init_size/batch_size) + 200
elapsed = 0
# for i in range(st_idx, len(batches)):
for ii, abatch in enumerate(batches):
    start = time.time()
    LP = warm_start_model.calc_local_params(abatch)
    K_resp = [LP["resp"][:,0], LP["resp"][:,1], LP["resp"][:,2]]
    # Setup the bokeh plots and render via call back
    line1 = p1.line(x = x, y = y, color='blue', name='g1', line_width=1)
    line2 = p2.scatter(x = index, y = x_batch_post , color='red', name='g2')
    line4 = p3.scatter(x = index, y = K_resp[0] , color='red', name='g4')
    line5 = p4.scatter(x = index, y = K_resp[1] , color='red', name='g4')
    line6 = p5.scatter(x = index, y = K_resp[2] , color='red', name='g4')
    line7 = p6.scatter(x = index, y = K_model , color='red', name='g4')
    line8 = p6.scatter(x = index, y = elapsed , color='red', name='g4')
    push_notebook(handle = target)
    
    # Shift the dataset to include new incoming data   
#     new_dataset = data_set.make_subset(example_id_list = list(range(i * batch_size - data_init_size, i * batch_size)))
    start_idx = data_start + (ii*batch_size)
    end_idx = data_start + ((ii+1)*batch_size)
    new_dataset = data_set.make_subset(example_id_list = list(range(start_idx, end_idx)))
    
    # Check sufficient statistics on the new batch with the previously learned model 
    LPanomaly = []
    SSanomaly = []
#     LP = warm_start_model.calc_local_params(batches[i])
#     K_resp = [LP["resp"][:,0], LP["resp"][:,1], LP["resp"][:,2]]
    LPanomaly.append(LP)  # Calculation of responsibility, needed for next step
    SSanomaly.append(warm_start_model.get_global_suff_stats(abatch, LP))  # Calculation of SS for new data
    x_batch_pre = []
    xx_batch_pre = []
    for key in SSanomaly:
        x_batch_pre.append(key.x)
        xx_batch_pre.append(key.xx)
    x_batch_pre = np.vstack(x_batch_pre)
    xx_batch_pre = np.vstack(xx_batch_pre)
    
    ### CHANGES HERE
    output_path = f'/tmp/AsteriskK8/warmstart-K=10/b{ii}'  # Dynamic output path according to batch
    if ii == 0:  # First batch use the cold start.
        warm_init_path = cold_info_dict['task_output_path']
    else:  # After, use previous warm start.
        warm_init_path = warm_info_dict['task_output_path']
    warm_start_model, warm_info_dict = bnpy.run(
        new_dataset, 'DPMixtureModel', 'DiagGauss', 'memoVB',
        output_path=output_path,
        nLap=nLap, nTask=1, nBatch=batchnum, convergeThr=0.0001,
        gamma0=gamma, sF=sF, ECovMat='eye',
        K=K, initname=warm_init_path, ts=True, debug=True)#     trained_model, trained_dict = bnpy.run(
    # Check sufficient statistics on the new batch with the newly learned model 
    LPanomaly = []
    SSanomaly = []
    LP = warm_start_model.calc_local_params(abatch)
    LPanomaly.append(LP)  # Calculation of responsibility, needed for next step
    SSanomaly.append(warm_start_model.get_global_suff_stats(abatch, LP))  # Calculation of SS for new data
    x_batch_post = []
    xx_batch_post = []
    K_model = []
    K_states = []
    for key in SSanomaly:
        x_batch_post.append(key.x)
        xx_batch_post.append(key.xx)    
        K_model.append(key.K)
    x_batch_post = np.vstack(x_batch_post)
    xx_batch_post = np.vstack(xx_batch_post)
    K_model = np.vstack(K_model)
    
    index = int(ii * batch_size)
    x_batch_pre = np.squeeze(np.squeeze(x_batch_pre))
    x_batch_post = np.squeeze(np.squeeze(x_batch_post))
    K_model = np.sum(x_batch_post > 1)
    y = np.squeeze(abatch.X)
    x = list(range(ii*len(abatch.X), ii*len(abatch.X) + len(abatch.X)))
    end = time.time()
    elapsed = end - start
