# Result Report

In [13]:
from collections import defaultdict
from pathlib import Path

import numpy as np
import pandas as pd
import seaborn as sns

dataset = "Hospital"
method = "lstm"
output_path = Path(f"../../../output/{method}") / dataset
raw_path = Path(f"../../../data/test/ed2_noswap/{dataset}/raw")
cleaned_path = Path(f"../../../data/test/ed2_noswap/{dataset}/cleaned")
max_step = 1


def not_equal(df1, df2):
    return (df1 != df2) & ~(df1.isnull() & df2.isnull())


def diff_dfs(df1, df2, compare_func=not_equal):
    assert (df1.columns == df2.columns).all(), "DataFrame column names are different"
    if any(df1.dtypes != df2.dtypes):
        "Data Types are different, trying to convert"
        df2 = df2.astype(df1.dtypes)
    if df1.equals(df2):
        return None
    else:
        diff_mask = compare_func(df1, df2)
        ne_stacked = diff_mask.stack()
        changed = ne_stacked[ne_stacked]
        changed.index.names = ["id", "col"]
        difference_locations = np.where(diff_mask)
        changed_from = df1.values[difference_locations]
        changed_to = df2.values[difference_locations]
        df = pd.DataFrame(
            {"Outliers": changed_from, "Normal values": changed_to},
            index=changed.index,
        )
        df["id"] = df.index.get_level_values("id")
        df["col"] = df.index.get_level_values("col")
        df["Outliers"] = df["Outliers"].apply(lambda x: x[:53])
        df["Normal values"] = df["Normal values"].apply(lambda x: x[:53])
        return df[["Outliers", "Normal values"]].reset_index(drop=True)


name2diff = {}
step2summary = defaultdict(dict)
name2report = defaultdict(lambda: pd.DataFrame())
name2debug = defaultdict(lambda: [None for _ in range(10)])

for dataset_path in output_path.iterdir():
    if dataset_path.name != "summary":
        raw_df = pd.read_csv(
            raw_path / f"{dataset_path.name}.csv", dtype=str, keep_default_na=False
        )
        cleaned_df = pd.read_csv(
            cleaned_path / f"{dataset_path.name}.csv", dtype=str, keep_default_na=False
        )
        name2diff[dataset_path.name] = diff_dfs(raw_df, cleaned_df)

        for step_path in sorted(dataset_path.iterdir()):
            report_df = pd.read_csv(dataset_path / step_path.name / "report.csv")
            score_df = pd.read_csv(dataset_path / step_path.name / "debug.csv")
            score_df["result"] = score_df["from"] == score_df["to"]

            report_values = report_df[report_df["index"] == "False"]

            name2report[dataset_path.name] = name2report[dataset_path.name].append(
                {
                    "precision": report_values["precision"].item(),
                    "recall": report_values["recall"].item(),
                    "f1-score": report_values["f1-score"].item(),
                },
                ignore_index=True,
            )

            name2debug[dataset_path.name][int(step_path.name)] = score_df

            step2summary[int(step_path.name)][dataset_path.name] = {
                "precision": report_values["precision"].item(),
                "recall": report_values["recall"].item(),
                "f1-score": report_values["f1-score"].item(),
                "support": int(report_values["support"].item()),
            }

for i in step2summary.keys():
    step2summary[i] = pd.DataFrame(step2summary[i])
    step2summary[i]["mean"] = step2summary[i].agg(["mean"], axis=1)
    step2summary[i] = step2summary[i].transpose()
    step2summary[i]["name"] = step2summary[i].index
    name2debug["summary"][i] = step2summary[i]

In [14]:
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
import dash_table as dt
import plotly.express as px
import plotly.graph_objects as go
from dash.dependencies import Input, Output
from jupyter_dash import JupyterDash

app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

server = app.server

name = list(name2diff.keys())[0]

app.layout = html.Div(
    [
        html.Div(
            [
                html.H2("Summary:"),
                dcc.Slider(
                    id="sum-slider",
                    min=0,
                    max=max_step,
                    step=1,
                    value=0,
                    marks={i: str(i) for i in range(max_step + 1)},
                ),
                dt.DataTable(
                    id="sum-table",
                    columns=[{"name": i, "id": i} for i in step2summary[0].columns],
                    filter_action="native",
                    sort_action="native",
                    data=step2summary[0].to_dict("records"),
                    page_action="native",
                    page_current=0,
                    page_size=10,
                ),
                dcc.Graph(
                    id="summary",
                    figure=px.bar(
                        step2summary[0][["precision", "recall", "f1-score"]],
                        barmode="group",
                    ),
                ),
            ]
        ),
        html.Div(
            [
                html.H2("Dataset:"),
                dcc.Dropdown(
                    id="dataset-dropdown",
                    options=[{"label": i, "value": i} for i in name2diff.keys()],
                    value=name,
                ),
                html.Br(),
                dcc.Graph(id="graph", figure=px.line(name2report[name])),
                html.Br(),
                dt.DataTable(
                    id="table",
                    columns=[
                        {"name": i, "id": i}
                        for i in list(name2diff.values())[0].columns
                    ],
                    filter_action="native",
                    sort_action="native",
                    data=name2diff[name].to_dict("records"),
                    page_action="native",
                    page_current=0,
                    page_size=10,
                ),
            ],
            style={"margin-bottom": "50px"},
        ),
        html.Div(
            [
                html.H4("Step:"),
                dcc.Slider(
                    id="slider",
                    min=0,
                    max=9,
                    step=1,
                    value=0,
                    marks={i: str(i) for i in range(max_step + 1)},
                ),
                html.Br(),
                dt.DataTable(
                    id="debug-table",
                    columns=[{"name": i, "id": i} for i in name2debug[name][0].columns],
                    filter_action="native",
                    sort_action="native",
                    data=name2debug[name][0].to_dict("records"),
                    page_action="native",
                    page_current=0,
                    page_size=10,
                ),
            ]
        ),
    ],
    style={"display": "block", "width": "80%", "margin-left": "50px"},
)


@app.callback(
    [
        Output(component_id="summary", component_property="figure"),
        Output(component_id="sum-table", component_property="data"),
    ],
    [Input(component_id="sum-slider", component_property="value")],
)
def master_slider_change(value):
    return (
        px.bar(
            step2summary[value][["precision", "recall", "f1-score"]], barmode="group"
        ),
        step2summary[value].to_dict("records"),
    )


@app.callback(
    Output(component_id="table", component_property="data"),
    [Input(component_id="dataset-dropdown", component_property="value")],
)
def dropdown_change(value):
    return name2diff[value].to_dict("records")


@app.callback(
    Output("graph", "figure"),
    [Input(component_id="dataset-dropdown", component_property="value")],
)
def make_figure(value):
    return px.line(name2report[value])


@app.callback(
    Output(component_id="debug-table", component_property="data"),
    [
        Input(component_id="slider", component_property="value"),
        Input(component_id="dataset-dropdown", component_property="value"),
    ],
)
def slider_change(value, name):
    return name2debug[name][value].to_dict("records")

In [15]:
app.run_server()

Dash app running on http://127.0.0.1:8050/
