In [1]:
from GWMT import *
import readMergeTree as rmt
import os
import networkx as nx
from matplotlib import pyplot as plt

# Load data
* The simplified merge tree
* The scalar field data

## loading the merge tree and the scalar field data

In [2]:
datasets = ["20180501_juelich", "20180623_juelich", "20190512_juelich"]

In [3]:
maxima_only = True
# value_thres = 2.0

thres_dict_by_time = {
    "morning": 9,
    "afternoon": 10,
    "late-afternoon": 9
}

time_period = {
    "morning": [600, 900],  # 0, 36
    "afternoon": [901, 1500],  # 37, 108
    "late-afternoon": [1501, 1800],  # 109, 
}

def get_time_str(hrtime):
    int_hrtime = int(hrtime)
    for key in time_period:
        if (int_hrtime >= time_period[key][0]) and (int_hrtime <= time_period[key][1]):
            return key

def get_hrtime_by_filename(filename):
    fn1 = filename.replace(".txt", "").replace(".npy", "")
    datetime = fn1.split("_")[-1]
    date, hrtime = datetime.split("t")
    return hrtime

# gwmt_list = []

# mt_list = []
# root_list = []
# region_list = []
# value_list = []

gwmt_list = dict()

mt_list = dict()
root_list = dict()

time_list = dict()
period_list = dict()

datasets = ["20180501_juelich", "20180623_juelich", "20190512_juelich"]
for dataset in datasets:
    print("Working on", dataset)
    mt_list[dataset] = []
    root_list[dataset] = []
    
    time_list[dataset] = []
    period_list[dataset] = []
    
    dataset_path = os.path.join("data", dataset)
    for froot, di, files in os.walk(dataset_path):

        def key(s):
            try:
                int(s)
                return int(s)
            except ValueError:
                return len(files) + 1

        def isSegmentation(s: str):
            return "segmentation" in s

        def endsWithTxt(s: str):
            return s.endswith("txt")

        def endsWithNpy(s: str):
            return s.endswith("npy")

        txt_files = list(filter(endsWithTxt, files))
        txt_files.sort(key=lambda x: key(x.split(".")[0].split("_")[-1]))

        # You need to specify the root node type. Choices: ["minimum", "maximum"]
        # (Avoid specifying merge tree type to avoid confusion between split tree and join tree in different contexts)
        for file in txt_files:
            hrtime = get_hrtime_by_filename(file)
            value_thres = thres_dict_by_time[get_time_str(hrtime)]
            trees, roots = rmt.get_trees(os.path.join(dataset_path, file), root_type="minimum", threshold=value_thres)
            if len(trees) > 0:
                time_list[dataset].append(hrtime)
                period_list[dataset].append(get_time_str(hrtime))
            mt_list[dataset].extend(trees)
            root_list[dataset].extend(roots)

    assert (len(root_list[dataset]) == len(mt_list[dataset]))
    assert (len(time_list[dataset]) == len(mt_list[dataset]))
    assert (len(period_list[dataset]) == len(mt_list[dataset]))
    
# ================================================================== #
    
# Let's not oversimplify the merge tree, because now we need many nodes as anchor points

# This serves for removing the very-small cloud system from the results entirely
# This should be very small
disappear_volume_threshold = 1

# This is to reduce the number of anchor points (but not remove) for cloud systems
# This can be a bit large

def get_volume_thres(time_str):
    volume_thres_by_time_dict = {
        "morning": 5,
        "afternoon": 5,
        "late-afternoon": 5,
    }
    return volume_thres_by_time_dict[time_str]

simplified_mt_list = dict()
simplified_root_list = dict()

for dataset in datasets:
    print("Working on", dataset)
    simplified_mt_list[dataset] = [None for i in range(len(mt_list[dataset]))]
    simplified_root_list[dataset] = [None for i in range(len(mt_list[dataset]))]

    idx = 0

    for i in range(len(mt_list[dataset])):
        mt = mt_list[dataset][i]
        
        tstr = period_list[dataset][i]
        tt = time_list[dataset][i]
        value_thres = thres_dict_by_time[get_time_str(tt)]
        if True:
            _, simp_mt = volume_simplify_mt(mt, 
                                            vol_thres=get_volume_thres(tstr), 
                                            disappear_vol_thres=disappear_volume_threshold, 
                                            vol_name="volume", 
                                            stop_saddle_val=value_thres)
            simplified_mt_list[dataset][idx] = simp_mt
            simplified_root_list[dataset][idx] = simp_mt.root
        else:
            simplified_mt_list[dataset][idx] = mt
            simplified_root_list[dataset][idx] = mt.root

        nn_smaller_than_thres = 0
        for node in simplified_mt_list[dataset][idx].nodes():
            if simplified_mt_list[dataset][idx].nodes[node]["height"] < value_thres:
                if simplified_mt_list[dataset][idx].nodes[node]["type"] == 2:
                    nn_smaller_than_thres += 1
        print(tt, mt.number_of_nodes(), simplified_mt_list[dataset][idx].number_of_nodes(), nn_smaller_than_thres)
        idx += 1

Working on 20180501_juelich
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volum

Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Adding volume to the merge tree
Working 

1015 15441 2930 0
Initially removing 6521 leaves.
1020 15229 2974 0
Initially removing 6587 leaves.
1025 15340 2979 0
Initially removing 6602 leaves.
1030 15430 2987 0
Initially removing 6733 leaves.
1035 15627 2930 0
Initially removing 6713 leaves.
1040 15581 2926 0
Initially removing 6788 leaves.
1045 15661 2864 0
Initially removing 6642 leaves.
1050 15295 2831 0
Initially removing 6733 leaves.
1055 15507 2848 0
Initially removing 6609 leaves.
1100 15251 2798 0
Initially removing 6597 leaves.
1105 15135 2689 0
Initially removing 6625 leaves.
1110 15189 2686 0
Initially removing 6657 leaves.
1115 15239 2708 0
Initially removing 6576 leaves.
1120 15121 2691 0
Initially removing 6523 leaves.
1125 14999 2649 0
Initially removing 6557 leaves.
1130 14914 2504 0
Initially removing 6420 leaves.
1135 14736 2514 0
Initially removing 6441 leaves.
1140 14777 2554 0
Initially removing 6448 leaves.
1145 14744 2520 0
Initially removing 6515 leaves.
1150 14881 2512 0
Initially removing 6523 leaves.


1205 11532 1891 0
Initially removing 5114 leaves.
1210 11613 1895 0
Initially removing 5069 leaves.
1215 11595 1928 0
Initially removing 5197 leaves.
1220 11809 1901 0
Initially removing 5104 leaves.
1225 11672 1949 0
Initially removing 5198 leaves.
1230 11848 1968 0
Initially removing 5115 leaves.
1235 11677 1967 0
Initially removing 5280 leaves.
1240 12003 2003 0
Initially removing 5222 leaves.
1245 11980 2093 0
Initially removing 5288 leaves.
1250 12035 2012 0
Initially removing 5496 leaves.
1255 12423 2042 0
Initially removing 5497 leaves.
1300 12574 2091 0
Initially removing 5492 leaves.
1305 12507 2020 0
Initially removing 5611 leaves.
1310 12755 2074 0
Initially removing 5557 leaves.
1315 12606 2073 0
Initially removing 5399 leaves.
1320 12423 2146 0
Initially removing 5495 leaves.
1325 12720 2274 0
Initially removing 5683 leaves.
1330 13050 2312 0
Initially removing 5649 leaves.
1335 13083 2391 0
Initially removing 5771 leaves.
1340 13232 2300 0
Initially removing 5768 leaves.


1400 5663 2898 0
Initially removing 1507 leaves.
1405 5637 2848 0
Initially removing 1446 leaves.
1410 5549 2885 0
Initially removing 1421 leaves.
1415 5437 2865 0
Initially removing 1401 leaves.
1420 5311 2722 0
Initially removing 1430 leaves.
1425 5345 2741 0
Initially removing 1416 leaves.
1430 5244 2673 0
Initially removing 1365 leaves.
1435 5119 2627 0
Initially removing 1356 leaves.
1440 5071 2618 0
Initially removing 1344 leaves.
1445 5068 2651 0
Initially removing 1340 leaves.
1450 5009 2588 0
Initially removing 1301 leaves.
1455 4962 2618 0
Initially removing 1246 leaves.
1500 4850 2584 0
Initially removing 1188 leaves.
1505 4772 2673 0
Initially removing 1167 leaves.
1510 4698 2628 0
Initially removing 1191 leaves.
1515 4618 2526 0
Initially removing 1189 leaves.
1520 4561 2468 0
Initially removing 1145 leaves.
1525 4435 2444 0
Initially removing 1094 leaves.
1530 4325 2382 0
Initially removing 1042 leaves.
1535 4157 2348 0
Initially removing 1008 leaves.
1540 4005 2252 0
Ini

## Apply area-based intra-cloud anchor point simplification

In [4]:
# # Let's not oversimplify the merge tree, because now we need many nodes as anchor points

# # This serves for removing the very-small cloud system from the results entirely
# # This should be very small
# disappear_volume_threshold = 10

# # This is to reduce the number of anchor points (but not remove) for cloud systems
# # This can be a bit large
# volume_threshold = 30

# simplified_mt_list = [None for i in range(len(mt_list))]
# simplified_root_list = [None for i in range(len(mt_list))]

# idx = 0

# for mt, region in zip(mt_list, region_list):
#     if volume_threshold > 0:
#         _, simp_mt = volume_simplify_mt(mt, 
#                                         vol_thres=volume_threshold, 
#                                         disappear_vol_thres=disappear_volume_threshold, 
#                                         vol_name="volume", 
#                                         stop_saddle_val=value_thres)
#         simplified_mt_list[idx] = simp_mt
#         simplified_root_list[idx] = simp_mt.root
#     else:
#         simplified_mt_list[idx] = mt
#         simplified_root_list[idx] = mt.root
#     print(mt.number_of_nodes(), simplified_mt_list[idx].number_of_nodes())
#     idx += 1

## We save all the remaining critical point information as a list

- Key information: "x", "y", "z", "CriticalType"

In [6]:
cp_info_root = "./simplified-merge-trees/"

for dataset in datasets:
    cp_info_dir = os.path.join(cp_info_root, dataset)
    os.makedirs(cp_info_dir, exist_ok=True)

    for em, mt in enumerate(simplified_mt_list[dataset]):
        crit_pts = []
        for node in mt.nodes():
            crit_pts.append([mt.nodes[node]["x"], mt.nodes[node]["y"], mt.nodes[node]["z"], mt.nodes[node]["height"], mt.nodes[node]["type"]])
        cp_file = os.path.join(cp_info_dir, "treeNodes_{}.txt".format(str(em).zfill(3)))
        with open(cp_file, "w") as outf:
            for x, y, z, height, tp in crit_pts:
                print(int(x), int(y), int(z), height, int(tp), file=outf)
    #     crit_pts = np.asarray(crit_pts, dtype=int)
    #     np.savetxt(cp_file, crit_pts, fmt="%d")

        edges = []
        for node in mt.nodes():
            for neighbor in mt.neighbors(node):
                assert node < mt.number_of_nodes()
                if neighbor > node:
                    edges.append([node, neighbor])
        edge_file = os.path.join(cp_info_dir, "treeEdges_{}.txt".format(str(em).zfill(3)))
        edges = np.asarray(edges, dtype=int)

        assert len(edges) == len(crit_pts) - 1
        np.savetxt(edge_file, edges, fmt="%d")


## We put all anchor points at a time step into a list for future use

In [None]:
# anchor_points_list = []

# for mt in simplified_mt_list:
#     anchor_pts = []
#     for node in mt.nodes():
#         if mt.nodes[node]["type"] == 2:
#             anchor_pts.append({"id":node, "x":mt.nodes[node]["x"], "y":mt.nodes[node]["y"]})
#     anchor_points_list.append(anchor_pts)

# Watershed Segmentation

In [None]:
# from skimage.segmentation import watershed

First, we visualize all the cloud areas above the superlevel set threshold

In [None]:
# # value_thres = 2.0
# time_i = 10

In [None]:
# scalar_field = value_list[time_i]
# anchor_points = anchor_points_list[time_i]

In [None]:
# binary_map = np.zeros(scalar_field.shape)
# binary_map[scalar_field >= value_thres] = 1

In [None]:
# plt.imshow(binary_map, interpolation="none")
# plt.colorbar()

In [None]:
# ## Watershed tests

# # markers are the coordinates from anchor points, but must be INT
# markers = np.asarray([[int(each["x"]), int(each["y"])] for each in anchor_points], dtype=int)
# print(markers.shape, np.max(markers[:, 0]), np.max(markers[:, 1]))
# print(scalar_field.shape)
                    
# plt.imshow(binary_map, interpolation="none")
# plt.scatter(x=markers[:, 1], 
#             y=markers[:, 0],
#             s=2)
# plt.colorbar()

In [None]:
# from scipy.ndimage import label
# markers_in_field = np.zeros(scalar_field.shape, dtype=bool)
# markers_in_field[tuple(markers.T)] = True
# markers_with_label, _ = label(markers_in_field, structure=np.asarray([[0, 0, 0],[0, 1, 0],[0, 0, 0]]))

# labels = watershed(-scalar_field, markers=markers_with_label, connectivity=2, mask=binary_map)

# plt.figure(figsize=(16, 10))
# plt.imshow(labels, interpolation="none")
# plt.colorbar()
# plt.scatter(x=markers[:, 1], 
#             y=markers[:, 0],
#             s=2,
#             c="r")

In [None]:
# plt.figure(figsize=(16, 10))
# plt.imshow(markers_with_label, interpolation="none")
# plt.colorbar()
# # plt.scatter(x=markers[:, 1], 
# #             y=markers[:, 0],
# #             s=2,
# #             c="r")

We briefly report some statistics to this segmentation

In [None]:
# label_index, label_counts = np.unique(labels, return_counts=True)

# print(len(markers))
# print(np.sum(markers_in_field))
# print(np.max(markers_with_label))

In [None]:
# mlabel_index, mlabel_counts = np.unique(markers_with_label, return_counts=True)
# print(len(mlabel_counts))

### The histogram for the segmentation area. 

A majority of cloud segmentations are small in area, but there are also regions with >6000 pixels in area. 
It casts doubt on whether using the area as the probability is stable, because the range of the probability of nodes can be very large. (e.g., 6000:10)

In [None]:
# plt.hist(label_counts[1:], bins=200)
# plt.show()

### Unit-based probability distribution

We distribute a total probability of 1 to all anchor points based on the area of the segmentation. 
Therefore, the probability assigned to a node is (seg_area) * 1/(total_cloud_area), 

The 1/(total_cloud_area) is the unit for the probability in this case.

Notes for total cloud area: 
* time_0: 281231
* time_1: 250313
* time_2: 245292

In [None]:
# print("Total cloud area:", np.sum(label_counts[1:]))

## Pairwise Region distance

In [None]:
# time_j = time_i + 1
# print(time_i, time_j)

In [None]:
# def get_segmentation(t):
#     scalar_field = value_list[t]
#     anchor_points = anchor_points_list[t]
    
#     binary_map = np.zeros(scalar_field.shape)
#     binary_map[scalar_field >= value_thres] = 1
    
#     markers = np.asarray([[int(each["x"]), int(each["y"])] for each in anchor_points], dtype=int)
#     markers_in_field = np.zeros(scalar_field.shape, dtype=bool)
#     markers_in_field[tuple(markers.T)] = True
#     markers_with_label, _ = label(markers_in_field, structure=np.asarray([[0, 0, 0],[0, 1, 0],[0, 0, 0]]))

#     labels = watershed(-scalar_field, markers=markers_with_label, connectivity=2, mask=binary_map)
#     return labels

In [None]:
# seg_i = get_segmentation(time_i)
# seg_j = get_segmentation(time_j)

In [None]:
# dist_ij = segmentation_distance(seg_i, seg_j, max_dist=18, normalize_factor=932, max_workers=7)

In [None]:
# plt.imshow(dist_ij)

In [None]:
# ## Let's try some example filtering
# feature_i = np.random.randint(len(label_counts[1:])) + 1 # np.argmax(label_counts[1:]) + 1
# feature_i_map = np.zeros(seg_i.shape)
# feature_i_map[seg_i == feature_i] = 1

# feature_j_map = np.zeros(seg_j.shape)

# idx_i = feature_i - 1
# for idx_j in range(dist_ij.shape[1]):
#     feature_j = idx_j + 1
#     if dist_ij[idx_i, idx_j] < 932:
#         feature_j_map[seg_j == feature_j] = 10 + dist_ij[idx_i, idx_j]

# plt.figure(figsize=(18, 12))
# plt.subplot(211)
# plt.imshow(feature_i_map)

# plt.subplot(212)
# plt.imshow(feature_j_map)