In [1]:
import streamlit as st
import pickle as pkl
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from denoisers.ConditionalUnetDenoiser import ConditionalUnetDenoiser
from denoisers.ConditionalUnetMatrixDenoiser import ConditionalUnetMatrixDenoiser
from utils.graph_utils import get_process_model_reachability_graph_transition_matrix, get_process_model_petri_net_transition_matrix
from utils.pm_utils import discover_dk_process, remove_duplicates_dataset, pad_to_multiple_of_n
from utils.Config import Config
import plotly.express as px
import plotly.graph_objects as go
from dataset.dataset import SaladsDataset
from ddpm.ddpm_multinomial import Diffusion
import os
import json
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

In [2]:
def load_experiment_config(target_dir):
    config_path = os.path.join(target_dir, "cfg.json")
    if os.path.exists(config_path):
        with open(config_path, "r") as f:
            return Config(**json.load(f))
    else:
        st.warning("Configuration file not found.")
        return None
def load_experiment_data_and_model(target_dir, cfg):
    with open(cfg.data_path, "rb") as f:
        base_dataset = pkl.load(f)
    dataset = SaladsDataset(base_dataset['target'], base_dataset['stochastic'])
    train_dataset, test_dataset = train_test_split(dataset, train_size=cfg.train_percent, shuffle=True,
                                                   random_state=cfg.seed)
    dk_process_model, dk_init_marking, dk_final_marking = discover_dk_process(train_dataset, cfg,
                                                                              preprocess=remove_duplicates_dataset)
    diffuser = Diffusion(noise_steps=cfg.num_timesteps, device=cfg.device)
    if cfg.enable_matrix:
        rg_nx, rg_transition_matrix = get_process_model_petri_net_transition_matrix(dk_process_model, dk_init_marking, dk_final_marking)
        rg_transition_matrix = torch.tensor(rg_transition_matrix, device=cfg.device).unsqueeze(0).float()
        rg_transition_matrix = pad_to_multiple_of_n(rg_transition_matrix)
        denoiser = ConditionalUnetMatrixDenoiser(in_ch=cfg.num_classes, out_ch=cfg.num_classes,
                                                 max_input_dim=dataset.sequence_length,
                                                 transition_dim=rg_transition_matrix.shape[-1],
                                                 device=cfg.device).to(cfg.device).float()
    else:
        rg_transition_matrix = torch.randn((cfg.num_classes, 2, 2)).to(cfg.device)
        denoiser = ConditionalUnetDenoiser(in_ch=cfg.num_classes, out_ch=cfg.num_classes,
                                           max_input_dim=dataset.sequence_length,
                                           device=cfg.device).to(cfg.device).float()
    ckpt_path = os.path.join(target_dir, "best.ckpt")
    denoiser.load_state_dict(torch.load(ckpt_path, map_location=cfg.device)['model_state'])
    final_res_path = os.path.join(target_dir, "final_results.json")
    if os.path.exists(final_res_path):
        with open(final_res_path, "r") as f:
            final_res = json.load(f)
    else:
        st.warning("Final results not found.")

    return (train_dataset, test_dataset, dk_process_model, dk_init_marking, dk_final_marking, rg_transition_matrix,
            diffuser, denoiser, final_res)

In [3]:
target_dir = r"D:\Projects\trace-denoise\final_runs\50_salads_unified_gamma_0_5"
cfg = load_experiment_config(target_dir)
cfg.device = "cuda:0"
train_dataset, test_dataset, dk_process_model, dk_init_marking, dk_final_marking, rg_transition_matrix, diffuser, denoiser, final_res = load_experiment_data_and_model(target_dir, cfg)

In [4]:
with open("../data/pickles/50_salads_unified.pkl", "rb") as f:
    salads_data = pkl.load(f)
with open("../data/pickles/gtea_unified.pkl", "rb") as f:
    gtea_data = pkl.load(f)

In [5]:
salads_dataset = SaladsDataset(salads_data['target'], salads_data['stochastic'])
gtea_dataset = SaladsDataset(gtea_data['target'], gtea_data['stochastic'])

In [6]:
salads_train, salads_test = train_test_split(salads_dataset, train_size=0.75, shuffle=True, random_state=42)
gtea_train, gtea_test = train_test_split(gtea_dataset, train_size=0.75, shuffle=True, random_state=42)
salads_tr_loader = DataLoader(salads_train, batch_size=2, shuffle=False, num_workers=2)
salads_ts_loader = DataLoader(salads_test, batch_size=2, shuffle=False, num_workers=2)
gtea_tr_loader = DataLoader(gtea_train, batch_size=2, shuffle=False, num_workers=2)
gtea_ts_loader = DataLoader(gtea_test, batch_size=2, shuffle=False, num_workers=2)

In [7]:
SALADS_PAD = 19
GTEA_PAD = 11

In [8]:
x_list = []
y_list = []
for x_0, y in salads_test:
    x_tokens = torch.argmax(x_0, dim=1)
    y_tokens = torch.argmax(y, dim=1)
    x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD]))
    y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD]))

In [9]:
def average_accuracy_dataset(gt, dataset):
    return np.mean([accuracy_score(np.argmax(x, axis=1), np.argmax(y, axis=1)) for x, y in zip(gt, dataset)])

In [10]:
acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
# auc = np.mean([roc_auc_score(x, y) for x, y in zip(x_list, y_list)])
print("Argmax measures: accuracy: {:.4f}, recall: {:.4f}, precision: {:.4f}".format(acc, rec, pre))

Argmax measures: accuracy: 0.7746, recall: 0.7318, precision: 0.7448


In [11]:
denoiser.eval()
denoised = []
gt = []
for x, y in tqdm(salads_ts_loader):
    x = x.permute(0, 2, 1).to(cfg.device).float()
    y = y.permute(0, 2, 1).to(cfg.device).float()
    x_hat, matrix_hat, loss, seq_loss, mat_loss = \
        diffuser.sample_with_matrix(denoiser, y.shape[0], cfg.num_classes, denoiser.max_input_dim,
                                    rg_transition_matrix.shape[-1], rg_transition_matrix, x, y,
                                    cfg.predict_on)
    denoised.append(x_hat)
    gt.append(x)

  0%|          | 0/5 [00:00<?, ?it/s]

In [12]:
denoised = torch.cat(denoised, dim=0)
gt = torch.cat(gt, dim=0)

In [13]:
x_list = []
y_list = []
for x_0, y in zip(gt, denoised):
    x_tokens = torch.argmax(x_0, dim=0)
    y_tokens = torch.argmax(torch.softmax(y, dim=0).transpose(0, 1), dim=1)
    x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD].cpu()))
    y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD].cpu()))

In [14]:
acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
# auc = np.mean([roc_auc_score(x, y) for x, y in zip(x_list, y_list)])
print("Diffusion with pm measures: accuracy: {:.4f}, recall: {:.4f}, precision: {:.4f}".format(acc, rec, pre))

Diffusion measures: accuracy: 0.9344, recall: 0.8865, precision: 0.8914


In [15]:
target_dir = r"D:\Projects\trace-denoise\final_runs\50_salads_unified_gamma_0_5_without_process"
cfg = load_experiment_config(target_dir)
cfg.device = "cuda:0"
train_dataset, test_dataset, dk_process_model, dk_init_marking, dk_final_marking, rg_transition_matrix, diffuser, denoiser, final_res = load_experiment_data_and_model(target_dir, cfg)
denoiser.eval()
denoised = []
gt = []
for x, y in tqdm(salads_ts_loader):
    x = x.permute(0, 2, 1).to(cfg.device).float()
    y = y.permute(0, 2, 1).to(cfg.device).float()
    x_hat, matrix_hat, loss, seq_loss, mat_loss = \
        diffuser.sample_with_matrix(denoiser, y.shape[0], cfg.num_classes, denoiser.max_input_dim,
                                    rg_transition_matrix.shape[-1], rg_transition_matrix, x, y,
                                    cfg.predict_on)
    denoised.append(x_hat)
    gt.append(x)
denoised = torch.cat(denoised, dim=0)
gt = torch.cat(gt, dim=0)
x_list = []
y_list = []
for x_0, y in zip(gt, denoised):
    x_tokens = torch.argmax(x_0, dim=0)
    y_tokens = torch.argmax(torch.softmax(y, dim=0).transpose(0, 1), dim=1)
    x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD].cpu()))
    y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD].cpu()))
acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
# auc = np.mean([roc_auc_score(x, y) for x, y in zip(x_list, y_list)])
print("Diffusion without pm measures: accuracy: {:.4f}, recall: {:.4f}, precision: {:.4f}".format(acc, rec, pre))

  0%|          | 0/5 [00:00<?, ?it/s]

Diffusion without pm measures: accuracy: 0.9352, recall: 0.8835, precision: 0.8876


In [15]:
datasets = [ds for ds in os.listdir("../data/synthetic") if "det" in ds]

In [16]:
result_list = []
denoiser.eval()
for ds in tqdm(datasets):
    with open(os.path.join("../data/synthetic", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    train_loader = DataLoader(dataset_train, batch_size=2, shuffle=False, num_workers=2)
    test_loader = DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=2)
    denoised = []
    gt = []
    for x, y in tqdm(test_loader):
        x = x.permute(0, 2, 1).to(cfg.device).float()
        y = y.permute(0, 2, 1).to(cfg.device).float()
        x_hat, matrix_hat, loss, seq_loss, mat_loss = \
            diffuser.sample_with_matrix(denoiser, y.shape[0], cfg.num_classes, denoiser.max_input_dim,
                                        rg_transition_matrix.shape[-1], rg_transition_matrix, x, y,
                                        cfg.predict_on)
        denoised.append(x_hat)
        gt.append(x)
    denoised = torch.cat(denoised, dim=0)
    gt = torch.cat(gt, dim=0)
    x_list = []
    y_list = []
    for x_0, y in zip(gt, denoised):
        x_tokens = torch.argmax(x_0, dim=0)
        y_tokens = torch.argmax(torch.softmax(y, dim=0).transpose(0, 1), dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD].cpu()))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD].cpu()))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    result_list.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

  0%|          | 0/17 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:29<01:57, 29.46s/it][A
 40%|████      | 2/5 [00:56<01:24, 28.05s/it][A
 60%|██████    | 3/5 [01:22<00:53, 26.92s/it][A
 80%|████████  | 4/5 [01:48<00:26, 26.59s/it][A
100%|██████████| 5/5 [02:14<00:00, 26.88s/it][A
  6%|▌         | 1/17 [02:14<35:53, 134.58s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:29<01:56, 29.02s/it][A
 40%|████      | 2/5 [00:54<01:20, 26.87s/it][A
 60%|██████    | 3/5 [01:19<00:52, 26.15s/it][A
 80%|████████  | 4/5 [01:44<00:25, 25.80s/it][A
100%|██████████| 5/5 [02:10<00:00, 26.08s/it][A
 12%|█▏        | 2/17 [04:25<33:04, 132.27s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:27<01:51, 27.93s/it][A
 40%|████      | 2/5 [00:53<01:19, 26.62s/it][A
 60%|██████    | 3/5 [01:19<00:52, 26.08s/it][A
 80%|████████  | 4/5 [01:44<00:25, 25.95s/it][A
100%|██████████| 5/5 [02:12<00:00, 26.47s/it][A
 18%|█▊

In [20]:
results_argmax = []
for ds in datasets:
    with open(os.path.join("../data/synthetic", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    x_list = []
    y_list = []
    for x_0, y in dataset_test:
        x_tokens = torch.argmax(x_0, dim=1)
        y_tokens = torch.argmax(y, dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD]))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD]))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    results_argmax.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

In [22]:
noise_levels = np.arange(0.67, 0.84, 0.01)
accs = [d["accuracy"] for d in result_list]
accs_argmax = [d["accuracy"] for d in results_argmax]
fig = go.Figure()
fig.add_trace(go.Scatter(x=noise_levels, y=accs, mode='lines+markers', name='Diffusion', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=noise_levels, y=accs_argmax, mode='lines+markers', name='Argmax', line=dict(color='red')))
fig.update_layout(title='Accuracy vs Noise Level',
                  xaxis_title='Noise Level',
                  yaxis_title='Accuracy')
fig.show()

In [23]:
datasets = [ds for ds in os.listdir("../data/synthetic") if "sto" in ds]

In [24]:
result_list = []
denoiser.eval()
for ds in tqdm(datasets):
    with open(os.path.join("../data/synthetic", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    train_loader = DataLoader(dataset_train, batch_size=2, shuffle=False, num_workers=2)
    test_loader = DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=2)
    denoised = []
    gt = []
    for x, y in tqdm(test_loader):
        x = x.permute(0, 2, 1).to(cfg.device).float()
        y = y.permute(0, 2, 1).to(cfg.device).float()
        x_hat, matrix_hat, loss, seq_loss, mat_loss = \
            diffuser.sample_with_matrix(denoiser, y.shape[0], cfg.num_classes, denoiser.max_input_dim,
                                        rg_transition_matrix.shape[-1], rg_transition_matrix, x, y,
                                        cfg.predict_on)
        denoised.append(x_hat)
        gt.append(x)
    denoised = torch.cat(denoised, dim=0)
    gt = torch.cat(gt, dim=0)
    x_list = []
    y_list = []
    for x_0, y in zip(gt, denoised):
        x_tokens = torch.argmax(x_0, dim=0)
        y_tokens = torch.argmax(torch.softmax(y, dim=0).transpose(0, 1), dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD].cpu()))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD].cpu()))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    result_list.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

  0%|          | 0/34 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:29<01:58, 29.68s/it][A
 40%|████      | 2/5 [00:55<01:22, 27.67s/it][A
 60%|██████    | 3/5 [01:22<00:53, 26.94s/it][A
 80%|████████  | 4/5 [01:48<00:26, 26.58s/it][A
100%|██████████| 5/5 [02:14<00:00, 26.88s/it][A
  3%|▎         | 1/34 [02:14<1:14:00, 134.57s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:28<01:53, 28.49s/it][A
 40%|████      | 2/5 [00:54<01:21, 27.08s/it][A
 60%|██████    | 3/5 [01:20<00:53, 26.62s/it][A
 80%|████████  | 4/5 [01:46<00:26, 26.41s/it][A
100%|██████████| 5/5 [02:13<00:00, 26.66s/it][A
  6%|▌         | 2/34 [04:28<1:11:26, 133.94s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:28<01:54, 28.68s/it][A
 40%|████      | 2/5 [00:54<01:21, 27.22s/it][A
 60%|██████    | 3/5 [01:21<00:53, 26.73s/it][A
 80%|████████  | 4/5 [01:47<00:26, 26.48s/it][A
100%|██████████| 5/5 [02:13<00:00, 26.74s/it][A
  9

In [25]:
results_argmax = []
for ds in datasets:
    with open(os.path.join("../data/synthetic", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    x_list = []
    y_list = []
    for x_0, y in dataset_test:
        x_tokens = torch.argmax(x_0, dim=1)
        y_tokens = torch.argmax(y, dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD]))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD]))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    results_argmax.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

In [26]:
noise_levels = np.arange(0.5, 0.84, 0.01)
accs = [d["accuracy"] for d in result_list]
accs_argmax = [d["accuracy"] for d in results_argmax]
fig = go.Figure()
fig.add_trace(go.Scatter(x=noise_levels, y=accs, mode='lines+markers', name='Diffusion', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=noise_levels, y=accs_argmax, mode='lines+markers', name='Argmax', line=dict(color='red')))
fig.update_layout(title='Accuracy vs Noise Level',
                  xaxis_title='Noise Level',
                  yaxis_title='Accuracy')
fig.show()

In [28]:
datasets = [ds for ds in os.listdir("../data/pickles") if "alpha" in ds and "sto" not in ds and "005" not in ds]

In [30]:
result_list = []
denoiser.eval()
for ds in tqdm(datasets):
    with open(os.path.join("../data/pickles", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    train_loader = DataLoader(dataset_train, batch_size=2, shuffle=False, num_workers=2)
    test_loader = DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=2)
    denoised = []
    gt = []
    for x, y in tqdm(test_loader):
        x = x.permute(0, 2, 1).to(cfg.device).float()
        y = y.permute(0, 2, 1).to(cfg.device).float()
        x_hat, matrix_hat, loss, seq_loss, mat_loss = \
            diffuser.sample_with_matrix(denoiser, y.shape[0], cfg.num_classes, denoiser.max_input_dim,
                                        rg_transition_matrix.shape[-1], rg_transition_matrix, x, y,
                                        cfg.predict_on)
        denoised.append(x_hat)
        gt.append(x)
    denoised = torch.cat(denoised, dim=0)
    gt = torch.cat(gt, dim=0)
    x_list = []
    y_list = []
    for x_0, y in zip(gt, denoised):
        x_tokens = torch.argmax(x_0, dim=0)
        y_tokens = torch.argmax(torch.softmax(y, dim=0).transpose(0, 1), dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD].cpu()))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD].cpu()))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    result_list.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

  0%|          | 0/7 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:30<02:03, 30.98s/it][A
 40%|████      | 2/5 [00:57<01:25, 28.54s/it][A
 60%|██████    | 3/5 [01:24<00:55, 27.74s/it][A
 80%|████████  | 4/5 [01:51<00:27, 27.33s/it][A
100%|██████████| 5/5 [02:18<00:00, 27.66s/it][A
 14%|█▍        | 1/7 [02:18<13:51, 138.52s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:29<01:57, 29.25s/it][A
 40%|████      | 2/5 [00:55<01:23, 27.76s/it][A
 60%|██████    | 3/5 [01:24<00:56, 28.12s/it][A
 80%|████████  | 4/5 [01:52<00:27, 27.95s/it][A
100%|██████████| 5/5 [02:20<00:00, 28.00s/it][A
 29%|██▊       | 2/7 [04:38<11:37, 139.53s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:29<01:58, 29.59s/it][A
 40%|████      | 2/5 [00:56<01:24, 28.15s/it][A
 60%|██████    | 3/5 [01:24<00:55, 27.78s/it][A
 80%|████████  | 4/5 [01:51<00:27, 27.77s/it][A
100%|██████████| 5/5 [02:19<00:00, 27.81s/it][A
 43%|████▎

In [31]:
results_argmax = []
for ds in datasets:
    with open(os.path.join("../data/pickles", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    x_list = []
    y_list = []
    for x_0, y in dataset_test:
        x_tokens = torch.argmax(x_0, dim=1)
        y_tokens = torch.argmax(y, dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD]))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD]))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    results_argmax.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

In [32]:
noise_levels = np.arange(0.1, 0.8, 0.1)
accs = [d["accuracy"] for d in result_list]
accs_argmax = [d["accuracy"] for d in results_argmax]
fig = go.Figure()
fig.add_trace(go.Scatter(x=noise_levels, y=accs, mode='lines+markers', name='Diffusion', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=noise_levels, y=accs_argmax, mode='lines+markers', name='Argmax', line=dict(color='red')))
fig.update_layout(title='Accuracy vs Noise Level',
                  xaxis_title='Noise Level',
                  yaxis_title='Accuracy')
fig.show()

In [33]:
datasets = [ds for ds in os.listdir("../data/pickles") if "alpha" in ds and "sto" in ds and "005" not in ds]

In [37]:
target_dir = r"D:\Projects\trace-denoise\final_runs\50_salads_unified_gamma_0_5_without_process"
cfg = load_experiment_config(target_dir)
cfg.device = "cuda:0"
train_dataset, test_dataset, dk_process_model, dk_init_marking, dk_final_marking, rg_transition_matrix, diffuser, denoiser, final_res = load_experiment_data_and_model(target_dir, cfg)

In [38]:
result_list = []
denoiser.eval()
for ds in tqdm(datasets):
    with open(os.path.join("../data/pickles", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    train_loader = DataLoader(dataset_train, batch_size=2, shuffle=False, num_workers=2)
    test_loader = DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=2)
    denoised = []
    gt = []
    for x, y in tqdm(test_loader):
        x = x.permute(0, 2, 1).to(cfg.device).float()
        y = y.permute(0, 2, 1).to(cfg.device).float()
        x_hat, matrix_hat, loss, seq_loss, mat_loss = \
            diffuser.sample_with_matrix(denoiser, y.shape[0], cfg.num_classes, denoiser.max_input_dim,
                                        rg_transition_matrix.shape[-1], rg_transition_matrix, x, y,
                                        cfg.predict_on)
        denoised.append(x_hat)
        gt.append(x)
    denoised = torch.cat(denoised, dim=0)
    gt = torch.cat(gt, dim=0)
    x_list = []
    y_list = []
    for x_0, y in zip(gt, denoised):
        x_tokens = torch.argmax(x_0, dim=0)
        y_tokens = torch.argmax(torch.softmax(y, dim=0).transpose(0, 1), dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD].cpu()))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD].cpu()))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    result_list.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

  0%|          | 0/7 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:19<01:18, 19.73s/it][A
 40%|████      | 2/5 [00:36<00:54, 18.12s/it][A
 60%|██████    | 3/5 [00:53<00:35, 17.58s/it][A
 80%|████████  | 4/5 [01:10<00:17, 17.40s/it][A
100%|██████████| 5/5 [01:28<00:00, 17.63s/it][A
 14%|█▍        | 1/7 [01:28<08:50, 88.37s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:19<01:17, 19.45s/it][A
 40%|████      | 2/5 [00:36<00:53, 17.89s/it][A
 60%|██████    | 3/5 [00:53<00:34, 17.43s/it][A
 80%|████████  | 4/5 [01:10<00:17, 17.21s/it][A
100%|██████████| 5/5 [01:27<00:00, 17.44s/it][A
 29%|██▊       | 2/7 [02:55<07:18, 87.79s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
 20%|██        | 1/5 [00:19<01:17, 19.35s/it][A
 40%|████      | 2/5 [00:36<00:54, 18.33s/it][A
 60%|██████    | 3/5 [00:54<00:35, 17.83s/it][A
 80%|████████  | 4/5 [01:13<00:18, 18.34s/it][A
100%|██████████| 5/5 [01:32<00:00, 18.55s/it][A
 43%|████▎  

In [42]:
results_argmax = []
for ds in datasets:
    with open(os.path.join("../data/pickles", ds), "rb") as f:
        data = pkl.load(f)
    dataset = SaladsDataset(data['target'], data['stochastic'])
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.75, shuffle=True, random_state=42)
    x_list = []
    y_list = []
    for x_0, y in dataset_test:
        x_tokens = torch.argmax(x_0, dim=1)
        y_tokens = torch.argmax(y, dim=1)
        x_list.append(np.array(x_tokens[x_tokens != SALADS_PAD]))
        y_list.append(np.array(y_tokens[x_tokens != SALADS_PAD]))
    acc = np.mean([accuracy_score(x, y) for x, y in zip(x_list, y_list)])
    rec = np.mean([recall_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    pre = np.mean([precision_score(x, y, average='macro', zero_division=0) for x, y in zip(x_list, y_list)])
    results_argmax.append(
        {"dataset": ds, "accuracy": acc, "recall": rec, "precision": pre}
    )

In [43]:
noise_levels = np.arange(0.1, 0.8, 0.1)
accs = [d["accuracy"] for d in result_list]
accs_argmax = [d["accuracy"] for d in results_argmax]
fig = go.Figure()
fig.add_trace(go.Scatter(x=noise_levels, y=accs, mode='lines+markers', name='Diffusion', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=noise_levels, y=accs_argmax, mode='lines+markers', name='Argmax', line=dict(color='red')))
fig.update_layout(title='Accuracy vs Noise Level',
                  xaxis_title='Noise Level',
                  yaxis_title='Accuracy')
fig.show()

In [41]:
fig.write_html("lambda_scaling_with_pm.html")