# Heat map tool

## compute and save heatmaps

In [16]:
import seaborn as sn
import pandas as pd
import numpy as np
from tqdm import tqdm
import yaml

def print_hm(data, index, columns, vmin, vmax, title, selector=None, labels=["hidden size", "num. layers"]):
    if selector is not None:
        data_split = []
        for i in range(len(data[0][0])):
            data_split.append([])
            for j in range(len(data[0])):
                data_split[i].append([])
                for k in range(len(data)):
                    data_split[i][j].append(data[k][j][i])
        data = data_split[selector]

    df = pd.DataFrame(data, index=index, columns=columns)
    ax = sn.heatmap(df, annot=True, fmt=".4f", cmap="YlGnBu", vmin=vmin, vmax=vmax, cbar=False)
    ax.set_title(title)
    ax.figsize=(5,10)
    ax.set(xlabel=labels[0], ylabel=labels[1])
    ax.figure.savefig(f"heatmap/{title}.png", dpi=300)
    ax.figure.clf()

In [14]:
# get data from yaml
with open("heatmap_data.yaml", "r") as f:
    data = yaml.safe_load(f)

columns = ["256", "512", "1024"] #["16", "32", "64", "128", "256", "512", "1024"] # hidden_size 
index  = ["1", "2", "4"]#, "8", "16"]#, "16"] # n_layer
models = ["graph_gat"]# ["graph_GCN", "graph_gat", "graph_sage", "graph_gin"]
selector = 1 # 1: acc 
heads = [2, 4, 8]
x_type = "4ll"
raw=True

data = (
    data["data_cx"] if x_type == "c+x" else
    data["data_c"] if x_type == "c" else
    data["data_ll"] if x_type == "ll" else
    data["data_llc"] if x_type == "ll+c" else
    data["data_4ll"] if x_type == "4ll" else
    data["data_4llc"] if x_type == "4ll+c" else
    None
)

# get min max values
all_values = []
for test in data:
    for hidden_size in data[test]:
        for n_layer in hidden_size:
            all_values.append(n_layer[selector])
min = np.min(all_values)
max = np.max(all_values)
mean = np.mean(all_values)
median = np.median(all_values)
print(min, mean, median, max)
min = 0.83807
max = 0.89221

# get max btwn checkpoints
def get_max(model, head=None):
    data_split = [[[] for _ in range(len(columns))] for _ in range(len(index))]
    # populate
    for test in data:
        if (head is None and model in test) or (head is not None and model in test and str(head) in test):
            for i in range(len(index)):
                for c in range(len(columns)):
                    try:
                        data_split[i][c].append(data[test][c][i][selector])
                    except IndexError:
                        print(f"Error: {model} {head} {index[i]} {columns[c]}")
    # get max
    for i in range(len(index)):
        for c in range(len(columns)):
            try:
                data_split[i][c] = np.max(data_split[i][c])
            except ValueError:
                data_split[i][c] = 0
                print(f"Error: {model} {head} {index[i]} {columns[c]}")
    return data_split

if not raw:
    for model in tqdm(models):
        if model == "graph_gat":
            for h in heads:
                data_split = get_max(model, h)
                print_hm(data_split, index, columns, min, max, f"{x_type}_{model}_{h}")
        else:
            data_split = get_max(model)
            print_hm(data_split, index, columns, min, max, f"{x_type}_{model}")
else:
    for test in tqdm(data):
        print_hm(data[test], index, columns, min, max, f"{x_type}_{test}", selector)

0.82747 0.8683833333333333 0.86859 0.88624


100%|██████████| 12/12 [00:04<00:00,  2.64it/s]


<Figure size 640x480 with 0 Axes>

Notes:


c : 0.03262 0.7221019200000001 0.72215 0.85057
c+x : 0.00483 0.5954636 0.7085 0.84653
ll :  0.00483 0.7336465066666666 0.75733 0.88059
ll+c : 0.0 0.7418553333333333 0.76517 0.87952

4ll gat: 0.82747 0.8683833333333333 0.86859 0.88624
4ll sage: 0.84739 0.8723008333333335 0.87224 0.89194
4ll+c gat: 0.75527 0.8671287962962965 0.867685 0.88871
4ll+c sage: 0.84143 0.8723305555555556 0.87232 0.89221

\texttt{compute\_stats.py}

In [22]:
distance= [30, 40, 45, 50, 60]
models = ["a", "b", "c"]
min = 0.83807
max = 0.89221

data_1 = [0.88627, 0.89060, 0.89221, 0.89100, 0.89150]
data_2 = [0.88917, 0.88982, 0.89194, 0.89122, 0.89120]
data_3 = [0.87921, 0.88512, 0.88871, 0.88498, 0.87585]

print_hm([data_1, data_2, data_3], models, distance, min, max, "distance comparison heat map", labels=["distance", "models"])

<Figure size 640x480 with 0 Axes>

In [23]:
# input layers
layers = [0, 1, 2, 3]
hidden = [540, 1024]
min = 0.83807
max = 0.89221

data_1 = [0.89221, 0.88151, 0.88378, 0.88208]
data_2 = [0.89221, 0.88151, 0.88035, 0.88269]

print_hm([data_1, data_2], hidden, layers, min, max, "Input linear layers comparison heat map", labels=["layers", "hidden size"])

<Figure size 640x480 with 0 Axes>

In [20]:
# output layers
layers = [0, 1, 2, 3]
hidden = [7, 16]
min = 0.83807
max = 0.89221

data_1 = [0.89221, 0.88806, 0.88160, 0.88309]
data_2 = [0.89221, 0.88806, 0.88535, 0.88370]

print_hm([data_1, data_2], hidden, layers, min, max, "Output linear layers comparison heat map", labels=["layers", "hidden size"])

<Figure size 640x480 with 0 Axes>