In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scripts.datasets.constant import FLARE22_LABEL_ENUM
%matplotlib inline

AREA_TOP_DATA_PATH = "../all_area_data-T.json"
AREA_SIDE_DATA_PATH = "../all_area_data-W.json"

POINT_TOP_DATA_PATH = "../all_point_data-T.json"

point_df = pd.read_json(POINT_TOP_DATA_PATH)
point_df = point_df.T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)


In [None]:
start_point = point_df.applymap(lambda x: x[0])
end_point = point_df.applymap(lambda x: x[1])
display(start_point)
display(end_point)

In [None]:
display(start_point.describe())
display(end_point.describe())

In [None]:
dist_point = end_point - start_point

In [None]:
dist_point.describe()

In [None]:
area_df = pd.read_json(AREA_TOP_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)

max_area_projection_top = area_df.applymap(np.max)
display(max_area_projection_top.describe())


In [None]:
area_df = pd.read_json(AREA_SIDE_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)

max_area_projection_side = area_df.applymap(np.max)
display(max_area_projection_side.describe())


In [None]:
(max_area_projection_side / max_area_projection_top).describe()

In [None]:
import itertools


area_df = pd.read_json(AREA_TOP_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)

def batched(iterable, n):
    "Batch data into tuples of length n. The last batch may be shorter."
    # batched('ABCDEFG', 3) --> ABC DEF G
    if n < 1:
        raise ValueError('n must be at least one')
    it = iter(iterable)
    while batch := tuple(itertools.islice(it, n)):
        yield batch

def binning(data, reduce, n_bin=10):
    return [reduce(_bin) for _bin in list(batched(data, len(data) // n_bin + 1))]

def bin_exist(data, n_bin=10):
    # Check if certain organ existed at that bin
    return binning(data, n_bin=n_bin, reduce=lambda xs: (np.array(xs) > 0.0).any())

area_df.applymap(lambda x: bin_exist(x, 10))
