In [None]:
import json
import pathlib
import pandas as pd
import hvplot.pandas

In [None]:
HERE = pathlib.Path.cwd()
assert HERE.is_dir()

In [None]:
f1 = HERE / 'pacrat-backend' / 'algo' / 'examples' / 'small' / 'paths1.json'
f2 = HERE / 'pacrat-backend' / 'algo' / 'examples' / 'small' / 'paths2.json'
assert f1.is_file()
assert f2.is_file()

In [None]:
with open(f1, mode='r') as fh:
    j1 = json.load(fh)
with open(f2, mode='r') as fh:
    j2 = json.load(fh)

In [None]:
# for row in j1:
#     cdp = row['critical_detection_point']
#     if cdp['type'] != 'None':
#         print(row)
#         break

In [None]:
def json_to_df(jsn):
    # assume jsn is just an array with entries like:
    """
    {'id': 'e0ab00c2-1359-4357-9537-5063d8c3989c',
     'scenario_id': '85e0961a-8d7d-4f82-8879-8fa786290b62',
     'critical_detection_point': {'type': 'None'},
     'interdiction_probability': 0.0,
     'steps': [{'type': 'Area', 'id': '1bd81034-cf6e-45c5-b0c8-4a8904bbc803'},
      {'type': 'Area', 'id': '9f10de8f-bd26-4204-b509-d1fd7414205d'},
      {'type': 'Area', 'id': 'd93641aa-8a40-4333-8a0c-976be0397e66'},
      {'type': 'Action', 'id': '122b8388-983f-4786-a02e-a8a8110c09cf'},
      {'type': 'Area', 'id': '9f10de8f-bd26-4204-b509-d1fd7414205d'},
      {'type': 'Safeguard', 'id': '243ca98c-f920-4bb7-8e55-faa81c68bee3'},
      {'type': 'Safeguard', 'id': '4c2b3ef1-db05-4533-9aa9-62599c8284dc'},
      {'type': 'Safeguard', 'id': 'd0a4fada-4c63-4657-a304-4a3a7546ea58'},
      {'type': 'Area', 'id': 'dde9a627-6183-47c6-a3b0-17046d95182b'},
      {'type': 'Action', 'id': '8dea7ae9-455c-42b6-9b24-661341860010'},
      {'type': 'Area', 'id': '9f10de8f-bd26-4204-b509-d1fd7414205d'},
      {'type': 'Area', 'id': 'd93641aa-8a40-4333-8a0c-976be0397e66'},
      {'type': 'Area', 'id': 'c22834c3-b2d9-4b0d-acca-b4387754e876'}]}
    """
    lst = []
    for row in jsn:
        n_steps = len(row['steps'])
        n_countermeasures = len([r for r in row['steps'] if r['type'] == 'Safeguard'])
        n_areas = len([r for r in row['steps'] if r['type'] == 'Area'])
        n_actions = len([r for r in row['steps'] if r['type'] == 'Action'])
        cdp_type = row['critical_detection_point']['type']
        ip = row['interdiction_probability']
        # cdp_pos = np.nan if cdp_type is not None else 
        assert (n_countermeasures + n_areas + n_actions) == n_steps
        lst.append([ip, n_steps, n_countermeasures, n_areas, n_actions, cdp_type])
    df = pd.DataFrame(lst, columns=['ip', 'n_steps', 'n_countermeasures', 'n_areas', 'n_actions', 'cdp_type'])
    return df.sort_values(df.columns.tolist()).reset_index(drop=True)

In [None]:
df1 = json_to_df(j1)
df2 = json_to_df(j2)
df1.shape, df2.shape

In [None]:
df1.tail()

In [None]:
df2.tail()

In [None]:
# 0 means not different, 1 means different, sum yields number of differences (>0 if not all rows identical; we want == 0!)
diff = (df1 != df2).astype(int)
diff

In [None]:
diff_rows = diff[ diff.sum(axis=1) > 0 ].index
print('num diff rows:', len(diff_rows))

In [None]:
df1.loc[diff_rows]

In [None]:
df2.loc[diff_rows]

In [None]:
df1.ip.max(), df2.ip.max()

In [None]:
df1[ df1.ip == df1.ip.max() ].shape[0], df2[ df2.ip == df2.ip.max() ].shape[0]