In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pprint import pprint
import os

HYLEAK_DIR = "/ChaoMI/HyLeak-data/"
output_domain_size_dict = {
    "smartgrid-1": (3, 12),
    "prob-termination-5": (6, 10),
    "prob-termination-7": (8, 10),
    "smartgrid-2": (9, 12),
    "prob-termination-9": (10, 10),
    "prob-termination-12": (13, 20),
    "reservoir-4": (16, 4),
    "window-20": (20, 20),
    "window-24": (24, 24),
    "smartgrid-3": (27, 12),
    "window-28": (28, 28),
    "window-32": (32, 32),
    "reservoir-6": (64, 8),
    "smartgrid-4": (81, 12),
    "smartgrid-5": (243, 12),
    "reservoir-8": (256, 16),
    "random-walk-3": (500, 24),
    "random-walk-5": (500, 31),
    "random-walk-7": (500, 33),
    "random-walk-14": (500, 40),
    "reservoir-10": (1024, 32),
    "reservoir-12": (4096, 64),
}

method_order = [
    "empirical",
    "ChaoFON",
    "ChaoFRN",
    "ChaoION",
    "ChaoIRN",
    "miller",
    "ChaoFOM",
    "ChaoFRM",
    "ChaoIOM",
    "ChaoIRM",
    "HyLeak",
]

# parse hyleak result

In [2]:
def parse_hyleak_log(log_path, debug=False):
    with open(log_path) as f:
        lines = [l.strip() for l in f.readlines()]
    linegroups = []
    linegroup = None
    for l in lines:
        if l.startswith("n:") and linegroup:
            linegroups.append(linegroup)
            linegroup = [l]
        else:
            if linegroup is None:
                linegroup = []
            linegroup.append(l)
    linegroups.append(linegroup)
    hyleak_esti_data, hyleak_time_data = [], []
    for idx, linegroup in enumerate(linegroups, 1):
        if debug:
            print(f"group {idx}")
            for l in linegroup:
                print(l)
            print()

        Nx = int(
            int(linegroup[0].split(" ")[1])
            / output_domain_size_dict[subject][0]
        )
        estimates = [
            float(v)
            for v in linegroup[3].split(": ")[1].strip("[]").split(", ")
        ]
        times = [
            (float(v) * 10**-3)
            for v in linegroup[5].split(": ")[1].strip("[]").split(", ")
        ]
        for trial_idx in range(len(estimates)):
            hyleak_esti_data.append([Nx, trial_idx, estimates[trial_idx]])
            hyleak_time_data.append([Nx, trial_idx, times[trial_idx]])
    colname = "HyLeak"
    hyleak_esti_df = pd.DataFrame(
        hyleak_esti_data, columns=["Nx", "trial", colname]
    )
    hyleak_esti_df.set_index(["Nx", "trial"], inplace=True)
    hyleak_time_df = pd.DataFrame(
        hyleak_time_data, columns=["Nx", "trial", colname]
    )
    hyleak_time_df.set_index(["Nx", "trial"], inplace=True)
    if debug:
        display(hyleak_esti_df)
        display(hyleak_time_df)
    return hyleak_esti_df, hyleak_time_df


subject = "reservoir-4"
log_path = os.path.join(HYLEAK_DIR, f"{subject}.log")
hyleak_heu_esti_df, hyleak_heu_time_df = parse_hyleak_log(log_path, debug=True)

group 1
n: 32
--------------------
mis (uncorrected): [1.462031023, 1.405677321, 1.579434003, 1.399531023, 1.409066706, 1.625, 1.524531023, 1.395054799, 1.421179692, 1.399531023, 1.391934003, 1.573019116, 1.379868821, 1.369349704, 1.587031023, 1.431849704, 1.462031023, 1.358679692, 1.236294941, 1.494349704, 1.266934003, 1.421441639, 1.488699408, 1.534066706, 1.409066706, 1.488699408, 1.284066706, 1.546179692, 1.280639062, 1.241228908]
mis: [1.45227710130625, 1.395904938053125, 1.572107812125, 1.38849629870625, 1.39797231218125, 1.617603365, 1.5159366850625, 1.384104648078125, 1.4101149297875, 1.38854432880625, 1.3808977538625, 1.565555239046875, 1.368826268946875, 1.35703595609375, 1.5797655177625, 1.42076872859375, 1.4522039125625, 1.34637209383125, 1.22287533135, 1.48447634245, 1.2534556903, 1.4127565770375, 1.478814865, 1.52545910228125, 1.39795442159375, 1.4788595914375, 1.27055708446875, 1.53752009414375, 1.2683113371125, 1.227676796628125]
cis: [(1.4438724430683592, 1.46068175954

Unnamed: 0_level_0,Unnamed: 1_level_0,HyLeak
Nx,trial,Unnamed: 2_level_1
2,0,1.452277
2,1,1.395905
2,2,1.572108
2,3,1.388496
2,4,1.397972
...,...,...
20,25,0.757153
20,26,0.723606
20,27,0.872516
20,28,0.857306


Unnamed: 0_level_0,Unnamed: 1_level_0,HyLeak
Nx,trial,Unnamed: 2_level_1
2,0,0.088
2,1,0.079
2,2,0.076
2,3,0.078
2,4,0.078
...,...,...
20,25,0.098
20,26,0.085
20,27,0.090
20,28,0.100


# Merge with the original data
## 1. esti

In [3]:
existing_data_path = f"/ChaoMI/result/esti-{subject}-i-xy.csv"
existing_data_df = pd.read_csv(existing_data_path, header=0, index_col=(0, 1))
display(existing_data_df)
# check whether all the indice set are the same between existing data and hyleak
# print(set(existing_data_df.index))
# print(set(hyleak_heu_esti_df.index))
assert set(existing_data_df.index) == set(hyleak_heu_esti_df.index)
merge_df = pd.merge(
    existing_data_df, hyleak_heu_esti_df, left_index=True, right_index=True
)
display(merge_df)
# save the merged data
output_path = f"/ChaoMI/result/esti-merged-{subject}-i-xy.csv"
merge_df.to_csv(output_path)

Unnamed: 0_level_0,Unnamed: 1_level_0,ChaoFOM,ChaoFON,ChaoFRM,ChaoFRN,ChaoIOM,ChaoION,ChaoIRM,ChaoIRN,empirical,miller,GT,N
Nx,trial,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2,0,0.442654,0.840471,0.498260,0.897106,0.733503,1.436628,0.733503,1.436628,1.436628,0.733503,0.731314,1000000
4,0,0.130461,0.382829,0.157080,0.406067,0.674877,0.883210,0.682163,0.890497,0.976270,0.624707,0.731314,1000000
8,0,0.644056,0.757355,0.659601,0.772232,0.696836,0.841066,0.696836,0.841066,0.859914,0.684133,0.731314,1000000
20,0,0.674660,0.742724,0.674592,0.742823,0.684600,0.754912,0.684600,0.754912,0.754912,0.684600,0.731314,1000000
2,1,-0.122571,0.142194,-0.102870,0.164228,0.618760,1.321885,0.618760,1.321885,1.321885,0.618760,0.731314,1000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20,28,0.707198,0.775688,0.708335,0.776623,0.723476,0.793788,0.723476,0.793788,0.793788,0.723476,0.731314,1000000
2,29,0.274371,0.630484,0.360478,0.723032,0.791225,1.494350,0.791225,1.494350,1.494350,0.791225,0.731314,1000000
4,29,0.710241,1.024912,0.761597,1.065724,0.903162,1.111495,0.906904,1.115237,1.162005,0.810442,0.731314,1000000
8,29,0.631776,0.796450,0.636376,0.800645,0.765630,0.909861,0.765630,0.909861,0.953144,0.777363,0.731314,1000000


Unnamed: 0_level_0,Unnamed: 1_level_0,ChaoFOM,ChaoFON,ChaoFRM,ChaoFRN,ChaoIOM,ChaoION,ChaoIRM,ChaoIRN,empirical,miller,GT,N,HyLeak
Nx,trial,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,0,0.442654,0.840471,0.498260,0.897106,0.733503,1.436628,0.733503,1.436628,1.436628,0.733503,0.731314,1000000,1.452277
4,0,0.130461,0.382829,0.157080,0.406067,0.674877,0.883210,0.682163,0.890497,0.976270,0.624707,0.731314,1000000,1.111190
8,0,0.644056,0.757355,0.659601,0.772232,0.696836,0.841066,0.696836,0.841066,0.859914,0.684133,0.731314,1000000,0.900311
20,0,0.674660,0.742724,0.674592,0.742823,0.684600,0.754912,0.684600,0.754912,0.754912,0.684600,0.731314,1000000,0.817486
2,1,-0.122571,0.142194,-0.102870,0.164228,0.618760,1.321885,0.618760,1.321885,1.321885,0.618760,0.731314,1000000,1.395905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20,28,0.707198,0.775688,0.708335,0.776623,0.723476,0.793788,0.723476,0.793788,0.793788,0.723476,0.731314,1000000,0.857306
2,29,0.274371,0.630484,0.360478,0.723032,0.791225,1.494350,0.791225,1.494350,1.494350,0.791225,0.731314,1000000,1.227677
4,29,0.710241,1.024912,0.761597,1.065724,0.903162,1.111495,0.906904,1.115237,1.162005,0.810442,0.731314,1000000,1.080302
8,29,0.631776,0.796450,0.636376,0.800645,0.765630,0.909861,0.765630,0.909861,0.953144,0.777363,0.731314,1000000,0.952753


## 2. time

In [4]:
existing_data_path = f"/ChaoMI/result/time-{subject}-i-xy.csv"
existing_data_df = pd.read_csv(existing_data_path, header=0, index_col=(0, 1))
display(existing_data_df)
# check whether all the indice set are the same between existing data and hyleak
assert set(existing_data_df.index) == set(hyleak_heu_time_df.index)
merge_df = pd.merge(
    existing_data_df, hyleak_heu_time_df, left_index=True, right_index=True
)
display(merge_df)
# save the merged data
output_path = f"/ChaoMI/result/time-merged-{subject}-i-xy.csv"
merge_df.to_csv(output_path)

Unnamed: 0_level_0,Unnamed: 1_level_0,ChaoFOM,ChaoFON,ChaoFRM,ChaoFRN,ChaoIOM,ChaoION,ChaoIRM,ChaoIRN,empirical,miller
Nx,trial,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,0,1.406305,1.329906,1.477381,1.379237,0.042251,0.040504,0.038239,0.037210,0.000339,0.000262
4,0,2.403343,2.389803,2.456750,2.443950,0.885401,0.856825,0.895394,0.866920,0.000370,0.000406
8,0,1.512180,1.525469,1.509111,1.498667,0.360250,0.340099,0.363988,0.348270,0.000374,0.000408
20,0,1.158788,1.226725,1.149591,1.168080,0.039111,0.035418,0.034928,0.032974,0.000414,0.000458
2,1,2.305148,2.061579,1.619480,1.577833,0.041488,0.035899,0.044860,0.042745,0.000321,0.000357
...,...,...,...,...,...,...,...,...,...,...,...
20,28,1.103741,0.737208,0.696623,0.836232,0.047674,0.040644,0.045343,0.045982,0.000222,0.000242
2,29,0.844936,1.289019,1.104131,1.450001,0.045415,0.039179,0.045407,0.039096,0.000317,0.000351
4,29,0.965313,1.326331,0.905431,0.841857,0.394317,0.264239,0.365000,0.444883,0.000367,0.000409
8,29,1.526618,1.337779,1.128833,1.241802,0.435277,0.364888,0.538689,0.386983,0.000364,0.000397


Unnamed: 0_level_0,Unnamed: 1_level_0,ChaoFOM,ChaoFON,ChaoFRM,ChaoFRN,ChaoIOM,ChaoION,ChaoIRM,ChaoIRN,empirical,miller,HyLeak
Nx,trial,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2,0,1.406305,1.329906,1.477381,1.379237,0.042251,0.040504,0.038239,0.037210,0.000339,0.000262,0.088
4,0,2.403343,2.389803,2.456750,2.443950,0.885401,0.856825,0.895394,0.866920,0.000370,0.000406,0.080
8,0,1.512180,1.525469,1.509111,1.498667,0.360250,0.340099,0.363988,0.348270,0.000374,0.000408,0.086
20,0,1.158788,1.226725,1.149591,1.168080,0.039111,0.035418,0.034928,0.032974,0.000414,0.000458,0.101
2,1,2.305148,2.061579,1.619480,1.577833,0.041488,0.035899,0.044860,0.042745,0.000321,0.000357,0.079
...,...,...,...,...,...,...,...,...,...,...,...,...
20,28,1.103741,0.737208,0.696623,0.836232,0.047674,0.040644,0.045343,0.045982,0.000222,0.000242,0.100
2,29,0.844936,1.289019,1.104131,1.450001,0.045415,0.039179,0.045407,0.039096,0.000317,0.000351,0.079
4,29,0.965313,1.326331,0.905431,0.841857,0.394317,0.264239,0.365000,0.444883,0.000367,0.000409,0.079
8,29,1.526618,1.337779,1.128833,1.241802,0.435277,0.364888,0.538689,0.386983,0.000364,0.000397,0.097


# Automate for all subjects

In [5]:
for subject in output_domain_size_dict.keys():
    print(f"Processing {subject}", flush=True)
    # subject = "smartgrid-1"
    log_path = os.path.join(HYLEAK_DIR, f"{subject}.log")
    hyleak_heu_esti_df, hyleak_heu_time_df = parse_hyleak_log(log_path)
    existing_data_path = (
        f"/ChaoMI/result/esti-{subject}-i-xy.csv"
    )
    existing_data_df = pd.read_csv(
        existing_data_path, header=0, index_col=(0, 1)
    )
    # display(existing_data_df)
    # check whether all the indice set are the same between existing data and hyleak
    # print(set(existing_data_df.index))
    # print(set(hyleak_heu_esti_df.index))
    assert set(existing_data_df.index) == set(hyleak_heu_esti_df.index)
    merge_df = pd.merge(
        existing_data_df, hyleak_heu_esti_df, left_index=True, right_index=True
    )
    # display(merge_df)
    # save the merged data
    output_path = (
        f"/ChaoMI/result/esti-merged-{subject}-i-xy.csv"
    )
    merge_df.to_csv(output_path)
    existing_data_path = (
        f"/ChaoMI/result/time-{subject}-i-xy.csv"
    )
    existing_data_df = pd.read_csv(
        existing_data_path, header=0, index_col=(0, 1)
    )
    # display(existing_data_df)
    # check whether all the indice set are the same between existing data and hyleak
    assert set(existing_data_df.index) == set(hyleak_heu_time_df.index)
    merge_df = pd.merge(
        existing_data_df, hyleak_heu_time_df, left_index=True, right_index=True
    )
    # display(merge_df)
    # save the merged data
    output_path = (
        f"/ChaoMI/result/time-merged-{subject}-i-xy.csv"
    )
    merge_df.to_csv(output_path)

Processing smartgrid-1
Processing prob-termination-5
Processing prob-termination-7
Processing smartgrid-2
Processing prob-termination-9
Processing prob-termination-12
Processing reservoir-4
Processing window-20
Processing window-24
Processing smartgrid-3
Processing window-28
Processing window-32
Processing reservoir-6
Processing smartgrid-4
Processing smartgrid-5
Processing reservoir-8
Processing random-walk-3
Processing random-walk-5
Processing random-walk-7
Processing random-walk-14
Processing reservoir-10
Processing reservoir-12
