In [None]:
import numpy as np
import json
import pandas as pd
from os import listdir

### Read data

In [None]:
def read_data(result_number):

    DATA_DIR = rf"../results/{result_number}"

    result_list = []

    for result in listdir(DATA_DIR):
        if result.endswith(".json"):
            with open(f"{DATA_DIR}/{result}") as f:
                data = json.load(f)
                instance = data.pop("data")
                result_list.append({**data, **instance})

    result_df = pd.DataFrame(result_list)
    result_df["batched_approximation_ratio"] = result_df["derandomized_batched_cost"] / result_df["optimal_batched_cost"]
    result_df["alg_time"] = result_df["derandomized_batch_time"] + result_df["unbatched_time"]
    result_df.loc[:, result_df.columns.str.contains("time")] = result_df.loc[:, result_df.columns.str.contains("time")] / 1e9
    result_df['number_of_breakpoints'] = result_df['interval_breakpoints'].apply(len)
    result_df.to_csv(f"{DATA_DIR}/results.csv", index=False)
    print(result_df.columns)
    return result_df
result_number = 1718317124655246000
result_df = read_data(result_number)

In [None]:
def label_problem(result_df):
    result_df["problem"] = None
    assert all(result_df.loc[result_df["unit_cost"], "problem"].isnull())
    result_df.loc[result_df["unit_cost"], "problem"] = "unit_cost"
    assert all((result_df.loc[result_df["is_series"], "problem"].isnull()))
    result_df.loc[result_df["is_series"], "problem"] = "series"
    assert all(result_df.loc[(~result_df["unit_cost"]) & (result_df["k"].notnull()) & (~result_df["is_series"]), "problem"].isnull())
    result_df.loc[(~result_df["unit_cost"]) & (result_df["k"].notnull()) & ~result_df["is_series"], "problem"] = "k-of-n"
    assert all(result_df.loc[result_df["k"].isnull(), "problem"].isnull())
    result_df.loc[result_df["k"].isnull(), "problem"] = "SSC"
    assert result_df["problem"].notnull().all()
label_problem(result_df)

In [None]:
groupview = result_df.groupby(["problem", "n"])

In [None]:
result_df.groupby("n")["optimal_time"].describe()

In [None]:
groupview["optimal_time"].describe()

In [None]:
groupview["batched_approximation_ratio"].describe()

In [None]:
high_batch_cost = result_df[result_df['batch_cost'] == result_df['n']]
high_batch_cost.groupby(['problem', 'n'])['batched_approximation_ratio'].describe()

In [None]:
result_df.sort_values("batched_approximation_ratio", ascending=False).head(10)[['n', 'interval_breakpoints', 'batched_approximation_ratio']]

In [None]:
result_df.groupby(['problem', 'number_of_breakpoints', 'n'])['number_of_batches'].describe()

In [None]:
summary = result_df[result_df['problem'] == 'series'].groupby(["problem_type", "n"])[["batched_approximation_ratio"]].agg(["mean", "max"])
summary.rename(columns={"batched_approximation_ratio": "Approximation Ratio", "alg_time": "Time (ms)"}, inplace=True)

# summary.rename(index={"unit_cost": "Unit Cost", "series": "Series", "k-of-n": "\\kofn", "SSC": "\\ssc"}, inplace=True)
summary.index.names = ["Problem Type", "n"]
summary = summary.unstack(0)
summary = summary.swaplevel(axis="columns")
summary = summary.sort_index(axis="columns", level="Problem Type", sort_remaining=False)
styler = summary.style.format(precision=3)

styler.to_latex(buf="summary.tex", label="tab:series", caption="Approximation ratio for series systems",hrules=True, position_float="centering")

In [None]:
summary2 = result_df[result_df['problem'] == 'k-of-n'].groupby(["n"])[["batched_approximation_ratio"]].agg(["mean", "max"])
summary2.rename(columns={"batched_approximation_ratio": "Approximation Ratio", "alg_time": "Time (ms)"}, inplace=True)

# # summary.rename(index={"unit_cost": "Unit Cost", "series": "Series", "k-of-n": "\\kofn", "SSC": "\\ssc"}, inplace=True)
# summary2.index.names = ["n"]
# summary2 = summary2.unstack(0)
# summary2 = summary2.swaplevel(axis="columns")
# summary2 = summary2.sort_index(axis="columns", level="Problem Type", sort_remaining=False)
# styler = summary2.style.format(precision=3)
summary2 = summary2.transpose()
summary2 = summary2.droplevel(0)
print(summary2)
styler2 = summary2.style.format(precision=3)
styler2.to_latex(buf="summary_kn.tex", label="tab:k-of-n", caption="Approximation ratio for \\kofn",)

In [None]:
summary3 = result_df[result_df['problem'] == 'SSC'].groupby(["n"])[["batched_approximation_ratio"]].agg(["mean", "max", "count"])
summary3.rename(columns={"batched_approximation_ratio": "Approximation Ratio", "alg_time": "Time (ms)"}, inplace=True)

# # summary.rename(index={"unit_cost": "Unit Cost", "series": "Series", "k-of-n": "\\kofn", "SSC": "\\ssc"}, inplace=True)
# summary2.index.names = ["n"]
# summary2 = summary2.unstack(0)
# summary2 = summary2.swaplevel(axis="columns")
# summary2 = summary2.sort_index(axis="columns", level="Problem Type", sort_remaining=False)
# styler = summary2.style.format(precision=3)
summary3 = summary3.transpose()
summary3 = summary3.droplevel(0)
print(summary3)
styler3 = summary3.style.format(precision=3)
styler3.to_latex(buf="summary_ssc.tex", label="tab:ssc", caption="Approximation ratio for \\ssc", hrules=True)


In [None]:
print(summary2.mean(axis=1))
print(summary3.mean(axis=1))

### Analyze SSC

In [None]:
ssc = result_df[result_df["problem"] == "SSC"]

In [None]:
ssc["num_breakpoints"] = ssc["interval_breakpoints"].apply(len)

In [None]:
ssc.groupby(["n", "num_breakpoints"])[""].describe()

Test floor


In [None]:
for n in range(5, 16):
    print(np.floor(n/4), np.floor(n/2), np.ceil(3*n/4))