# Fig 15

This notebook creates Fig 15, showing the generalization to larger samples on syntcomp.

We first load the evaluations of the SYNTCOMP `ht-50-eval-0`.

Then we create the plots, with the literals (a), b), c), d)) showing whether it is the first or second subplot of the Figure.


In [None]:
from ml2.ltl.ltl_syn import LTLSynEvalDataset

ds = LTLSynEvalDataset.load("ltl-syn/ht-50-eval-0/eval/0/1/csv_logger")

In [None]:
from typing import Optional, Tuple
import plotly.express as px
import pandas as pd
from typing import List
import plotly.graph_objects as go


def group_by(df, group_fn, column):
    d = {
        k: group_fn(group)
        for k, group in [(k, df.loc[v]) for k, v in df.groupby(by=column).groups.items()]
    }
    return pd.DataFrame.from_dict(d, orient="index").dropna()


def min_max_scaling(series):
    return (series - series.min()) / (series.max() - series.min())


def plot_hist(
    df: pd.DataFrame,
    group_fn,
    column: str,
    y_axis_name: str = "",
    x_axis_name: str = "",
    fraction_show=0.95,
    percentage: bool = True,
    box_features: List[Tuple[str, str]] = [("satisfied", "lightgreen"), ("violated", "red")],
    generalization_bound: Optional[int] = None,
):
    dataframe = group_by(df, group_fn, column).reset_index()
    dataframe = (
        dataframe[: -int(len(dataframe) * (1 - fraction_show))]
        if fraction_show != 1
        else dataframe
    )

    counter = 0
    box_features_df = {}
    for box, colour in box_features:
        box_features_df[box] = (dataframe[counter], colour)
        counter += 1

    data = []

    for k, box in box_features_df.items():
        data.append(
            go.Bar(
                name=k,
                x=dataframe["index"],
                y=box[0],
                marker_color=box[1],
            )
        )

    r = int(dataframe["index"].max().round(0)) - int(dataframe["index"].min())
    ticks_x = 1 if r < 25 else (2 if r < 50 else 5)

    fig = go.Figure()
    fig.update_layout(template="plotly_white")
    if generalization_bound is not None:
        fig.add_vrect(
            x0=generalization_bound + 0.5,
            x1=dataframe["index"].max().round(0) + 0.5,
            fillcolor="#c9c9c9",
            line_width=0,
            layer="below",
        )
        fig.add_vline(generalization_bound + 0.5, line_color="black")
    for d in data:
        fig.add_trace(d)
    fig.update_yaxes(title_text=y_axis_name, gridcolor="#e8e8e8")
    if percentage:
        fig.update_yaxes(
            tickmode="array",
            tickvals=[x / 10 for x in range(0, 11)],
            ticktext=[(str(x * 10) + "%") if x % 2 == 0 else "" for x in range(0, 11)],
            range=[0, 1.05],
        )
    fig.update_xaxes(title_text=x_axis_name, dtick=ticks_x, tick0=0)
    fig.update_layout(
        font=dict(color="black"),
        barmode="stack",
        # showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=0.05, xanchor="left", x=0.01),
        colorscale={"sequential": px.colors.qualitative.G10},
        height=300,
        width=900,
        margin=dict(l=0, r=10, t=10, b=0),
        bargap=0.1,
    )
    fig.show()
    return fig

In [None]:
def valid_group(group):
    counts = group["prediction_valid"].value_counts()
    if True in counts.index:
        acc = group["prediction_valid"].value_counts()[True] / len(group)
        return acc, 1 - acc
    else:
        return 0, 1


def valid_group_acc(group):
    counts = group["prediction_valid"].value_counts()
    if True in counts.index:
        acc = group["prediction_valid"].value_counts()[True] / len(group)
        return acc, acc, 1 - acc
    else:
        return 0, 0, 1

## Fig 15 a)


In [None]:
fig = plot_hist(
    ds.group_agg_smallest(inplace=False).df,
    valid_group,
    "input_num_properties",
    fraction_show=0.5,
    box_features=[("satisfied", "#008040")],
    y_axis_name="correct",
    generalization_bound=12,
    x_axis_name="number of properties",
)

## Fig 15 c)


In [None]:
fig = plot_hist(
    ds.group_agg_smallest(inplace=False).df,
    len,
    "input_num_properties",
    percentage=False,
    fraction_show=0.5,
    box_features=[("count", "black")],
    y_axis_name="count",
    x_axis_name="number of properties",
)

## Fig 15 b)


In [None]:
fig = plot_hist(
    ds.group_agg_smallest(inplace=False).df,
    valid_group,
    "input_max_prop_length",
    fraction_show=0.3,
    box_features=[("satisfied", "#008040")],
    y_axis_name="correct",
    generalization_bound=30,
    x_axis_name="property length",
)

## Fig 15 d)


In [None]:
fig = plot_hist(
    ds.group_agg_smallest(inplace=False).df,
    len,
    "input_max_prop_length",
    percentage=False,
    fraction_show=0.3,
    box_features=[("count", "black")],
    y_axis_name="count",
    x_axis_name="property length",
)