In [None]:
cd ..

In [None]:
import os
import pandas as pd
import torch
import plotly.graph_objects as go
import yaml
from plotly.subplots import make_subplots
import numpy as np
from ipywidgets import interact
import json
from sklearn.preprocessing import StandardScaler
from typing import Optional
import time
from sympy import *
import numpy as np

from seqcat_datamodule import Dataset
from seqcat_catvae import seqcat_vae
from seq_vae import seq_vae
from discret2dive_utils import load_model, assosciation_rule_mining, create_dict, save_files
from preprocessing_rules import getHealthStates
from diagnoser import Diag_solver
from translate import parse_file

np.random.seed(123)
torch.manual_seed(123)

In [None]:
def plot_diagnosis(anomaly):
    # specify path where to find the diagnosis rules
    rule_path = 'tank_rule_complete'
    rules = parse_file(f'diagnosis/{rule_path}.txt', is_sympy=False)
    # load trained model
    MODEL_VERSION = f'VAE_training_hparams/tank_discret2deepdive'
    ckpt_file_name = os.listdir(f'./{MODEL_VERSION}/checkpoints/')[-1]
    ckpt_file_path = f'./{MODEL_VERSION}/checkpoints/{ckpt_file_name}'
    with open(f'./{MODEL_VERSION}/hparams.yaml') as f:
        hparam = yaml.safe_load(f)
    model = seqcat_vae.load_from_checkpoint(ckpt_file_path, hparams=hparam["hparams"])

    MODEL_VERSION_SEQ = 'VAE_training_hparams/tank/seq_vae'
    threshold = -170
    ckpt_file_name_seq = os.listdir(f'./{MODEL_VERSION_SEQ}/checkpoints/')[-1]
    ckpt_file_path_seq = f'./{MODEL_VERSION_SEQ}/checkpoints/{ckpt_file_name_seq}'
    with open(f'./{MODEL_VERSION_SEQ}/hparams.yaml') as f:
        hparam_seq = yaml.safe_load(f)
    model_seq = seq_vae.load_from_checkpoint(checkpoint_path=ckpt_file_path_seq, hparams = hparam_seq)
    
    # read normal data
    if anomaly=='norm':
        df_csv = pd.read_csv(f'preprocessed_data/tank_simulation/norm_long.csv').iloc[:, :3].reset_index(drop=True)
        df_csv_realcat = pd.read_csv(f'preprocessed_data/tank_simulation/norm_long.csv').reset_index(drop=True).iloc[10:2000, 3].reset_index(drop=True)
        scaler = StandardScaler().fit(pd.read_csv('preprocessed_data/tank_simulation/norm_long.csv').iloc[:, :3].reset_index(drop=True))
        df_csv_sc = pd.DataFrame(scaler.transform(df_csv), columns=df_csv.columns, index=df_csv.index).iloc[:2000, :].reset_index(drop=True)

    # read anomalous data
    else:
        df_csv = pd.read_csv(f'preprocessed_data/tank_simulation/{anomaly}_long_faulty.csv').reset_index(drop=True).iloc[:, :3]
        df_csv_realcat = pd.read_csv(f'preprocessed_data/tank_simulation/{anomaly}_long_faulty.csv').iloc[1510:2500, 3].reset_index(drop=True)
        scaler = StandardScaler().fit(pd.read_csv('preprocessed_data/tank_simulation/norm.csv').iloc[1000:, :3].reset_index(drop=True))
        df_csv_sc = pd.DataFrame(scaler.transform(df_csv), columns=df_csv.columns, index=df_csv.index).iloc[1500:2500, :].reset_index(drop=True)
    faulty_idx = df_csv_realcat.str.contains('faulty').astype(int)
    dataset = Dataset(dataframe=df_csv_sc.iloc[:, :3].reset_index(drop=True), number_timesteps=hparam["hparams"]["NUMBER_TIMESTEPS"])

    #load automaton
    with open('preprocessed_data/automaton.json', 'r') as json_file:
        automaton = json.load(json_file)
    # compute initial discretization
    pzx_logits, pzx, mu, sigma, pxz, z = model.get_states(dataset[0].unsqueeze(0).to('cuda'))
    prev_cat = z.argmax().detach().cpu().numpy().astype(int)

    all_cats = []
    all_kl = []
    all_like = []
    all_residuals = []
    all_mu = []
    diag = []
    diag_true = []
    cat_diag = []
    transistion_valid = True
    for i in range(1, len(dataset)-1):
        window = dataset[i]
        predicted, x = model_seq(window.unsqueeze(0).to('cuda'))  
        residuals = np.abs(window - predicted.detach().cpu())  # Calculate absolute residuals
        all_residuals.extend(x['recon_loss'].flatten().detach().cpu().numpy())
        pzx_logits, pzx, mu, sigma, pxz, z = model.get_states(window.unsqueeze(0).to('cuda'))
        _, kl = model.kl_divergence(pzx=pzx)

        # computing actual category
        actual_cat = z.argmax().detach().cpu().numpy().astype(int)
        like = model.function_likelihood(x=window.unsqueeze(0).to('cuda')).mean()

        # check for a transition happened
        if prev_cat.item() != actual_cat.item(): 
            # check transistion is ok or not ok
            valid_transitions = automaton.get(str(prev_cat.item()), {})
            if str(actual_cat) in valid_transitions:
                transistion_valid = True
            else:
                transistion_valid = False

        # check on anomaly in catvae and seq_vae
        threshold_catvae_like = -8
        threshold_seqvae_res = -160
        seqvae_res_anom = x['recon_loss'].flatten().detach().cpu().numpy() < threshold_seqvae_res
        catvae_res_anom = like.flatten().detach().cpu().numpy() < threshold_catvae_like

        # check the combination of wrong transitions and anomalies
        if seqvae_res_anom[0] and transistion_valid==True: 
            one_hot_encoded = np.zeros(12, dtype=int)
            one_hot_encoded[actual_cat] = 1
            mode_anom = ''.join('b' if x == 1 else 'a' for x in one_hot_encoded)
            diag.append(mode_anom)
            cat_diag.append(actual_cat)
            prev_cat = z.argmax().detach().cpu().numpy().astype(int)

        elif transistion_valid==False:
            one_hot_encoded = np.zeros(12, dtype=int)
            cat = int(list(automaton.get(str(prev_cat), None).keys())[0])
            one_hot_encoded[cat] = 1
            mode_anom = ''.join('b' if x == 1 else 'a' for x in one_hot_encoded)
            diag.append(mode_anom)
            cat_diag.append(int(list(automaton.get(str(prev_cat), None).keys())[0]))

        else: 
            diag.append(-1)
            cat_diag.append(-1)
            prev_cat = z.argmax().detach().cpu().numpy().astype(int)

        z_list = z.detach().cpu().numpy().astype(int) 
        all_cats.append(z_list)
        all_kl.append(kl.detach().cpu().numpy())
        all_like.append(like.detach().cpu().numpy())
        all_mu.append(mu.detach().cpu().numpy())

        # making diagnosis 
        filtered_diag = pd.DataFrame(diag)[pd.DataFrame(diag)[0] != -1]
        # Check if the filtered DataFrame is empty
        if len(filtered_diag) == 0:
            faultStates = []  # or np.array([]) if you prefer a numpy array
            diag_true.append({'Index': i, 'Diag_Correct': 0})

        else:
            # Determine the number of rows to slice (either 100 or the length of filtered_diag)
            last_100_faults = min(100, len(filtered_diag))
            # Get the unique values from the last 100 (or fewer) rows of column 0
            faultStates = filtered_diag[-last_100_faults:][0].unique()

            fault_States = {key: False for key in faultStates}
            rules_healthStates = getHealthStates(rules=rules, faultStates=fault_States)
            diag_model = Diag_solver(rules=rules, health_dict=rules_healthStates)
            diag_diagnosis, min_causes, causes, delta_time = diag_model.solve()
            # check whether diagnosis is ok (solution is the diagnosis) or not ok. Will be plotted within the plot.
            checkDiag = any(anomaly.split('_')[0] == str(item) for sublist in diag_model.solve()[0] for item in sublist)
            if checkDiag == True:
                diag_true.append({'Index': i, 'Diag_Correct': 1})
            else: 
                diag_true.append({'Index': i, 'Diag_Correct': -1})


    all_residuals = np.array(all_residuals)
    anom_labels = np.where(all_residuals < threshold, .5 , 0)

    all = pd.DataFrame(np.vstack(all_cats))
    kl_ = pd.DataFrame(np.vstack(all_kl))
    cats = pd.DataFrame(all.idxmax(axis=1))
    like_ = pd.DataFrame(all_like)
    mu_ = pd.DataFrame(np.vstack(all_mu)[::10].reshape(-1, np.vstack(all_mu)[::10].shape[2]))
    data_ = pd.DataFrame(np.vstack(dataset)[::10].reshape(-1, np.vstack(all_mu)[::10].shape[2]))
    unique_cats = cats[cats.columns[0]].unique()

    fig = make_subplots(rows=7, cols=1, shared_xaxes=True)
    for i in range(0,3):
        fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_sc).index, y=pd.DataFrame(df_csv_sc)[pd.DataFrame(df_csv_sc).columns[i]], name=df_csv_sc.columns[i],  mode='markers'), 
                    row=1, col=1)
    fig.add_trace(go.Scatter(x = pd.DataFrame(df_csv_sc).index, y=cats[cats.columns[0]], name='discretized category', mode='lines'),row=2, col=1)
    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_realcat).index, y=df_csv_realcat.values, name='real category', mode='markers'), row=3, col=1)
    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_sc).index, y=all_residuals, name='residual', mode='markers'), row=4, col=1)
    fig.add_trace(go.Scatter(x=df_csv.index, y=like_[like_.columns[0]], mode='lines', name='likelihood'), row=5, col=1) # was row=5

    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_sc).index, y=faulty_idx, mode='lines', name='induced anomaly'), row=6, col=1)
    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_sc).index, y=anom_labels, mode='lines', name='anomaly indicator'), row=6, col=1)
    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_sc).index, y=cat_diag, mode='lines', name='diagosed category'), row=7, col=1)
    fig.add_trace(go.Scatter(x=pd.DataFrame(df_csv_sc).index, y=pd.DataFrame(diag_true).iloc[:,1], mode='lines', name='true diagnosis? 1-ok, -1-notok'), row=7, col=1)

    fig.update_layout(title_text=anomaly)
    fig.show()
    return diag_true


In [None]:
diag_true = interact(plot_diagnosis, anomaly=['v3_50s','q1_50s', 'v12_50s', 'v23_50s', 
                             'q1short1s', 'v12short1s', 'v23short1s', 'v3short1s',
                               'q1_100s', 'v12_100s', 'v23_100s', 'v3_100s', 'norm'])
