In [None]:
cd ..

In [None]:
import os
import pandas as pd
import torch
import json
import yaml
import numpy as np
from aalpy.learning_algs import run_Alergia
from aalpy.utils import visualize_automaton

from seqcat_datamodule import Dataset
from seqcat_catvae import seqcat_vae
from seq_vae import seq_vae
from discret2dive import check_normal
from sklearn.preprocessing import StandardScaler

np.random.seed(123)
torch.manual_seed(123)


In [None]:
def compute_transitions(idx):
    # load trained model
    MODEL_VERSION = f'VAE_training_hparams/tank_discret2deepdive'
    ckpt_file_name = os.listdir(f'./{MODEL_VERSION}/checkpoints/')[-1]
    ckpt_file_path = f'./{MODEL_VERSION}/checkpoints/{ckpt_file_name}'
    with open(f'./{MODEL_VERSION}/hparams.yaml') as f:
        hparam = yaml.safe_load(f)
    model = seqcat_vae.load_from_checkpoint(ckpt_file_path, hparams=hparam["hparams"])

    MODEL_VERSION_SEQ = 'VAE_training_hparams/tank/seq_vae'
    threshold = -170
    ckpt_file_name_seq = os.listdir(f'./{MODEL_VERSION_SEQ}/checkpoints/')[-1]
    ckpt_file_path_seq = f'./{MODEL_VERSION_SEQ}/checkpoints/{ckpt_file_name_seq}'
    with open(f'./{MODEL_VERSION_SEQ}/hparams.yaml') as f:
        hparam_seq = yaml.safe_load(f)
    model_seq = seq_vae.load_from_checkpoint(checkpoint_path=ckpt_file_path_seq, hparams = hparam_seq)
    
    # read normal data
    df_csv = pd.read_csv(f'preprocessed_data/tank_simulation/norm_long.csv').iloc[:, :3].reset_index(drop=True)
    df_csv_realcat = pd.read_csv(f'preprocessed_data/tank_simulation/norm_long.csv').reset_index(drop=True).iloc[:2000, 3].reset_index(drop=True)
    scaler = StandardScaler().fit(pd.read_csv('preprocessed_data/tank_simulation/norm_long.csv').iloc[:, :3].reset_index(drop=True))
    df_csv_sc = pd.DataFrame(scaler.transform(df_csv), columns=df_csv.columns, index=df_csv.index).iloc[:2000, :].reset_index(drop=True)
    faulty_idx = df_csv_realcat.str.contains('faulty').astype(int)
    dataset = Dataset(dataframe=df_csv_sc.iloc[:, :3].reset_index(drop=True), number_timesteps=hparam["hparams"]["NUMBER_TIMESTEPS"])


    all_cats = []
    all_kl = []
    all_mu = []
    # compute discretized categories and likelihoods
    for window in dataset:
        pzx_logits, pzx, mu, sigma, pxz, z = model.get_states(window.unsqueeze(0).to('cuda'))
        _, kl = model.kl_divergence(pzx=pzx)
        z_list = z.detach().cpu().numpy().astype(int) 
        all_cats.append(z_list)
        all_kl.append(kl.detach().cpu().numpy())
        all_mu.append(mu.detach().cpu().numpy())

    all = pd.DataFrame(np.vstack(all_cats))
    cats = pd.DataFrame(all.idxmax(axis=1))
    # detect whether the category changes or not
    cats['Prev_Value'] = cats[cats.columns[0]].shift(1)
    cats['Change'] = (cats[cats.columns[0]] != cats['Prev_Value'])
    transitions = cats[cats['Change'] == True]
    transitions = transitions.dropna()
    return transitions

def learn_automata(transitions):
    # learning an automata based on Alergia library
    model = run_Alergia(data=[transitions[transitions.columns[0]].to_list()], automaton_type='mc', eps=0.001, print_info=True)
    transitions_data = []  # List to collect transition data
    for state in model.states:
        for sub in state.transitions: 
            transition = {
                'Previous': state.output,
                'Current': sub[0].output,
                'Probability': sub[1]
            }
            transitions_data.append(transition)
    transitions_df = pd.DataFrame(transitions_data)
    transitions_df.to_csv('preprocessed_data/tank_transitions.csv')
    
    automaton = {}
    for index, row in transitions_df.iterrows():
        prev = str(int(row['Previous'])) 
        curr = str(int(row['Current']))
        prob = row['Probability']

        if prev not in automaton:
            automaton[prev] = {}
        automaton[prev][curr] = prob

    with open('preprocessed_data/automaton.json', 'w') as json_file:
        json.dump(automaton, json_file, indent=4)

def rule_learning():
    # min threshold for applicable rules 
    min_threshold = 0.0005
    # factor of support for association rule mining
    min_support = 0.0002
    # likelihood threshold for anomaly detection
    threshold_likelihood = -50
    # paths to save for learned rules
    dict_path = 'tank_dict_states_char'
    rule_path = 'tank_rule'
    anomaly_df, likelihood, threshold_res = check_normal(min_support, dict_path, rule_path)



In [None]:
# learning diagnosis rules based on the catvae discretization
rule_learning()

In [None]:
# computation of transitions based on catvae and normal data
transitions = compute_transitions(idx=1)


In [None]:
# learning an automata and save the information. 
learn_automata(transitions)