# Evaluation of CatVAE discretization performance with preprocessed BeRfiPl Dataset
Visual evaluation of the preciseness of discretization and meaningful categories compared to original states. <br>
Wihtin the first subplot we plot a selection of measurement values from the dataset. <br>
The second subplot includes the learned discretizations from the CatVAE indicated by the index for the x-axis. <br>
The third subplot plots the loglikelihood of the CatVAE according to the the input data. <br>

To make a statement about the change in states and likelihood, we plot one codeblock with nominal system behavior and the following with anomalous system behavior. 

In [None]:
cd ..

In [None]:
import os
import pandas as pd
import torch
import plotly.graph_objects as go
import yaml
from plotly.subplots import make_subplots

from utils import standardize_data
from datamodule import Dataset
from catvae import CategoricalVAE

In [None]:
def plot_like(anom):

    MODEL_VERSION = f'CatVAE_training_hparams/BeRfiPl'
    ckpt_file_name = os.listdir(f'./{MODEL_VERSION}/checkpoints/')[-1]
    ckpt_file_path = f'./{MODEL_VERSION}/checkpoints/{ckpt_file_name}'
    with open(f'./{MODEL_VERSION}/hparams.yaml') as f:
        hparam = yaml.safe_load(f)
    model = CategoricalVAE(hparams=hparam["hparams"]).load_from_checkpoint(ckpt_file_path).to('cuda')

    if anom==False:
        df_csv = pd.read_csv('preprocessed_data/BeRfiPl/ds1n.csv', index_col=0).reset_index(drop=True)
    else: 
        df_csv = pd.read_csv('preprocessed_data/BeRfiPl/ds1c.csv', index_col=0).reset_index(drop=True)
    df_sc = standardize_data(df_csv, 'scaler_BeRfiPl.pkl')
    df = Dataset(dataframe = df_sc)[:][0:]

    likelihood = pd.DataFrame(model.function_likelihood(torch.tensor(df).to(device='cuda')).cpu().detach()).rolling(10).median().fillna(method='bfill')
    pzx_logits, pzx, mu, sigma, pxz, z = model.get_states(torch.tensor(df).to(device='cuda'))
    df_states = pd.DataFrame(torch.zeros(z.shape).to(device='cuda').scatter(1, torch.argmax(pzx_logits, dim=1).unsqueeze(1), 1).cpu().detach().numpy(), index=pd.DataFrame(df_csv).index).astype(int)
    cats = pd.DataFrame(df_states.idxmax(axis=1), index=pd.DataFrame(df_csv).index)  

    fig = make_subplots(rows=3, cols=1, shared_xaxes=True)
    for i in range(0,5):
        fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv).index, y=pd.DataFrame(df_csv)[pd.DataFrame(df_csv).columns[i]], name=df_csv.columns[i],  mode='markers'), 
                      row=1, col=1)
    fig.add_trace(go.Scatter(x = pd.DataFrame(df_csv).index, y=cats[cats.columns[0]], name='category', mode='lines'),row=2, col=1)
    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv).index, y=likelihood[likelihood.columns[0]], mode='markers'), row=3, col=1)
    return fig


In [None]:
# plot of nominal data
plot_like(anom=False)

In [None]:
# plot of anomalous data as can be seen e.g. between idx 52 and 297 and gets worse from idx 3185 to 3395
plot_like(anom=True)