# Verify effect of Hidden Similarity Regularization on Models

In [None]:
import pandas as pd

hidden_reg_df = pd.read_csv("../results/spectralmpnn_experiment.csv")
hidden_reg_df

# Load Datasets

In [None]:
import torch
import yaml
from algo_reasoning.src.sampler import CLRSDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def load_algorithm_args(args_file):
    with open(args_file, 'r') as f:
        args = yaml.safe_load(f)

    return args

# Comparing effect of Spectral Architectures to Algorithm Length

In [None]:
algorithm_args = load_algorithm_args("../algorithm_args/default.yaml")
hidden_reg_df["max_length"] = pd.Series([0] * len(hidden_reg_df))

for alg in hidden_reg_df.algorithm:
    print("Generating sample for: ", alg)
    algorithms = [alg]
    nb_nodes = 64
    ds = CLRSDataset(algorithms, nb_nodes, 1, 1000, seed=7, algorithms_args=algorithm_args)
    obj = next(iter(ds)).to(device=device)

    hidden_reg_df.loc[hidden_reg_df.algorithm == alg, ["max_length"]] =  obj.max_length.item()

In [None]:
reg_1_effect = hidden_reg_df["specformer_f1_output"] - hidden_reg_df["mpnn_f1_output"]
reg_5_effect = hidden_reg_df["spectralmpnn_f1_output"] - hidden_reg_df["mpnn_f1_output"]

hidden_reg_df["specformer_effect"] = reg_1_effect
hidden_reg_df["spectralmpnn_effect"] = reg_5_effect

In [None]:
hidden_reg_df[["specformer_effect", "spectralmpnn_effect"]].mean()

In [None]:
_corr = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].corr()

_corr["max_length"]

In [None]:
hidden_reg_df

# Effect by Algorithm Type

In [None]:
type_dict = {
    "divide_and_conquer": ["find_maximum_subarray_kadane"],
    "dynamic_programming": ["matrix_chain_order", "lcs_length", "optimal_bst"],
    "geometry": ["segments_intersect", "graham_scan", "jarvis_march"],
    "graphs": ["dfs", "bfs", "topological_sort", "articulation_points", "bridges", "strongly_connected_components", "mst_kruskal", "mst_prim", "bellman_ford", "dijkstra", "dag_shortest_paths", "floyd_warshall"],
    "greedy": ["activity_selector", "task_scheduling"], 
    "searching": ["minimum", "binary_search", "quickselect"],
    "sorting": ["insertion_sort", "bubble_sort", "heapsort", "quicksort"],
    "strings": ["naive_string_matcher", "kmp_matcher"]
}

def get_algo_type(algo):
    for _type in type_dict.keys():
        print
        if algo in type_dict[_type]:
            return _type
        
hidden_reg_df["_type"] = hidden_reg_df.algorithm.apply(get_algo_type)
agg_df = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].groupby(['_type']).mean()
agg_df

In [None]:
agg_df[["mpnn_f1_output", "specformer_f1_output", "spectralmpnn_f1_output"]]

In [None]:
agg_df[["specformer_effect", "spectralmpnn_effect"]]

In [None]:
agg_df.corr()

# Effect by Output Type

In [None]:
from algo_reasoning.src.specs import SPECS, Stage

def get_output_type(algo):
    for k, v in SPECS[algo].items():
        stage, _, _type = v

        if stage == Stage.OUTPUT:
            return _type
        
hidden_reg_df["_output_type"] = hidden_reg_df.algorithm.apply(get_output_type)
hidden_reg_df
output_agg_df = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm', "_type"])].groupby(['_output_type']).mean()
output_agg_df

In [None]:
output_agg_df[["mpnn_f1_output", "specformer_f1_output", "spectralmpnn_f1_output"]]

In [None]:
output_agg_df[["specformer_effect", "spectralmpnn_effect"]]

# SpectralMPNN x MPNN

In [1]:
import pandas as pd

comparison_df = pd.read_csv("../results/spectralmpnn2.csv", sep=";")
comparison_df

Unnamed: 0,algorithm,mpnn_0,mpnn_1,mpnn_2,mpnn_3,mpnn_4,spectralmpnn_0,spectralmpnn_1,spectralmpnn_2,spectralmpnn_3,spectralmpnn_4,chebconv_0,chebconv_1,chebconv_2,chebconv_3,chebconv_4
0,activity_selector,0.914,0.8382,0.9242,0.9583,0.8444,0.7448,0.8457,0.7909,0.9217,0.919,0.8648,0.8528,0.8782,0.8881,0.9088
1,articulation_points,0.9703,0.9833,0.9598,0.8481,0.8996,0.9703,0.9833,0.9598,0.8481,0.8996,0.9795,0.9574,0.9335,0.8505,0.8268
2,bellman_ford,0.9868,0.981,0.9731,0.9785,0.9814,0.9541,0.9717,0.9731,0.9663,0.9746,0.9409,0.9521,0.9487,0.9463,0.9644
3,bfs,0.998,0.9863,0.9932,0.9951,0.9961,0.9971,1.0,0.9917,0.998,0.998,0.9995,0.9995,0.9917,0.9883,0.9526
4,binary_search,0.4063,0.4063,0.1875,0.125,0.25,0.375,0.1875,0.2188,0.3125,0.1875,0.3438,0.2813,0.1563,0.5,0.25
5,bridges,0.8406,0.5379,0.9055,0.9989,0.7142,0.8537,0.9012,0.7029,0.9012,0.7919,0.9033,0.7269,0.333,0.7387,0.2118
6,bubble_sort,0.0659,0.4014,0.0889,0.3047,0.2852,0.4888,0.597,0.4267,0.3215,0.4888,0.5444,0.4438,0.4312,0.2466,0.5444
7,dag_shortest_path,0.9829,0.9917,0.9927,0.9888,0.9912,0.8267,0.9849,0.8853,0.791,0.8057,0.9585,0.9878,0.9902,0.9595,0.9897
8,dfs,0.1011,0.2881,0.1436,0.3438,0.0986,0.1987,0.2461,0.0591,0.1855,0.2139,0.0815,0.1729,0.311,0.3052,0.311
9,dijkstra,0.9727,0.9761,0.9795,0.9858,0.9868,0.9775,0.979,0.9663,0.9761,0.9814,0.9668,0.9521,0.9678,0.9727,0.9717


In [2]:
mpnn_results_list = comparison_df.apply(lambda x: [x[f"mpnn_{i}"] for i in range(5)], axis=1)
mpnn_results_list = mpnn_results_list.set_axis(comparison_df["algorithm"])

spectralmpnn_results_list = comparison_df.apply(lambda x: [x[f"spectralmpnn_{i}"] for i in range(5)], axis=1)
spectralmpnn_results_list = spectralmpnn_results_list.set_axis(comparison_df["algorithm"])

chebconv_results_list = comparison_df.apply(lambda x: [x[f"chebconv_{i}"] for i in range(5)], axis=1)
chebconv_results_list = chebconv_results_list.set_axis(comparison_df["algorithm"])

In [3]:
import numpy as np

comparison_df["mpnn_avg"] = mpnn_results_list.apply(lambda x: sum(x)/len(x)).values
comparison_df["spectralmpnn_avg"] = spectralmpnn_results_list.apply(lambda x: sum(x)/len(x)).values
comparison_df["chebconv_avg"] = chebconv_results_list.apply(lambda x: sum(x)/len(x)).values

comparison_df[["mpnn_avg", "spectralmpnn_avg", "chebconv_avg"]]

Unnamed: 0,mpnn_avg,spectralmpnn_avg,chebconv_avg
0,0.89582,0.84442,0.87854
1,0.93222,0.93222,0.90954
2,0.98016,0.96796,0.95048
3,0.99374,0.99696,0.98632
4,0.27502,0.25626,0.30628
5,0.79942,0.83018,0.58274
6,0.22922,0.46456,0.44208
7,0.98946,0.85872,0.97714
8,0.19504,0.18066,0.23632
9,0.98018,0.97606,0.96622


In [4]:
type_dict = {
    "divide_and_conquer": ["find_maximum_subarray_kadane"],
    "dynamic_programming": ["matrix_chain_order", "lcs_length", "optimal_bst"],
    "geometry": ["segments_intersect", "graham_scan", "jarvis_march"],
    "graphs": ["dfs", "bfs", "topological_sort", "articulation_points", "bridges", "strongly_connected_components", "mst_kruskal", "mst_prim", "bellman_ford", "dijkstra", "dag_shortest_paths", "floyd_warshall"],
    "greedy": ["activity_selector", "task_scheduling"], 
    "searching": ["minimum", "binary_search", "quickselect"],
    "sorting": ["insertion_sort", "bubble_sort", "heapsort", "quicksort"],
    "strings": ["naive_string_matcher", "kmp_matcher"]
}

def get_algo_type(algo):
    for _type in type_dict.keys():
        print
        if algo in type_dict[_type]:
            return _type
        
comparison_df["_type"] = comparison_df.algorithm.apply(get_algo_type)
agg_df = comparison_df[comparison_df.columns.difference(['algorithm'])].groupby(['_type']).mean()
agg_df[["mpnn_avg", "spectralmpnn_avg", "chebconv_avg"]]

Unnamed: 0_level_0,mpnn_avg,spectralmpnn_avg,chebconv_avg
_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
divide_and_conquer,0.15,0.23126,0.25938
dynamic_programming,0.121873,0.116173,0.121373
geometry,0.9751,0.95376,0.96488
graphs,0.753949,0.708325,0.683909
greedy,0.85347,0.86896,0.8548
searching,0.414593,0.45824,0.429187
sorting,0.167565,0.39872,0.304055
strings,0.28128,0.36063,0.25002


In [None]:
# MPNN X SpectralMPNN

from scipy.stats import mannwhitneyu

def apply_mannwhitneyu(ls1, ls2):
    avg1 = sum(ls1)/len(ls1)
    avg2 = sum(ls2)/len(ls2)

    x, y = (ls1, ls2) if avg1 > avg2 else (ls2, ls1)
    
    res = mannwhitneyu(x, y, alternative='greater')
    
    gt_avg = 1 if avg1 > avg2 else 2
    return gt_avg if res.pvalue < 0.05 else 0

mannwhitneyu_result = pd.Series(comparison_df["algorithm"].apply(lambda algo: apply_mannwhitneyu(mpnn_results_list[algo], spectralmpnn_results_list[algo])))
comparison_df["result"] = mannwhitneyu_result.apply(lambda x: "tie" if x == 0 else ("mpnn" if x == 1 else "spectralmpnn"))
comparison_df[["algorithm", "result"]]

Unnamed: 0,algorithm,result
0,activity_selector,tie
1,articulation_points,tie
2,bellman_ford,mpnn
3,bfs,tie
4,binary_search,tie
5,bridges,tie
6,bubble_sort,spectralmpnn
7,dag_shortest_path,mpnn
8,dfs,tie
9,dijkstra,tie


In [None]:
# MPNN X ChebConv

mannwhitneyu_result = pd.Series(comparison_df["algorithm"].apply(lambda algo: apply_mannwhitneyu(mpnn_results_list[algo], chebconv_results_list[algo])))
comparison_df["result"] = mannwhitneyu_result.apply(lambda x: "tie" if x == 0 else ("mpnn" if x == 1 else "chebconv"))
comparison_df[["algorithm", "result"]]

Unnamed: 0,algorithm,result
0,activity_selector,tie
1,articulation_points,tie
2,bellman_ford,mpnn
3,bfs,tie
4,binary_search,tie
5,bridges,tie
6,bubble_sort,chebconv
7,dag_shortest_path,tie
8,dfs,tie
9,dijkstra,mpnn


In [None]:
# SpectralMPNN X ChebConv

mannwhitneyu_result = pd.Series(comparison_df["algorithm"].apply(lambda algo: apply_mannwhitneyu(chebconv_results_list[algo], spectralmpnn_results_list[algo])))
comparison_df["result"] = mannwhitneyu_result.apply(lambda x: "tie" if x == 0 else ("chebconv" if x == 1 else "spectralmpnn"))
comparison_df[["algorithm", "result"]]

Unnamed: 0,algorithm,result
0,activity_selector,tie
1,articulation_points,tie
2,bellman_ford,spectralmpnn
3,bfs,tie
4,binary_search,tie
5,bridges,tie
6,bubble_sort,tie
7,dag_shortest_path,chebconv
8,dfs,tie
9,dijkstra,spectralmpnn


In [9]:
# MPNN X SpectralMPNN

def apply_stdtest(ls1, ls2):
    avg1 = np.mean(ls1)
    avg2 = np.mean(ls2)

    x, y = (ls1, ls2) if avg1 > avg2 else (ls2, ls1)

    std = np.std(x)
    
    gt_avg = 1 if avg1 > avg2 else 2
    return gt_avg if np.mean(x) - std > np.mean(y) else 0

stdtest_result = pd.Series(comparison_df["algorithm"].apply(lambda algo: apply_stdtest(mpnn_results_list[algo], spectralmpnn_results_list[algo])))
comparison_df["result"] = stdtest_result.apply(lambda x: "tie" if x == 0 else ("mpnn" if x == 1 else "spectralmpnn"))
comparison_df[["algorithm", "result"]]

Unnamed: 0,algorithm,result
0,activity_selector,mpnn
1,articulation_points,tie
2,bellman_ford,mpnn
3,bfs,spectralmpnn
4,binary_search,tie
5,bridges,tie
6,bubble_sort,spectralmpnn
7,dag_shortest_path,mpnn
8,dfs,tie
9,dijkstra,tie


In [10]:
# MPNN X ChebConv

mannwhitneyu_result = pd.Series(comparison_df["algorithm"].apply(lambda algo: apply_stdtest(mpnn_results_list[algo], chebconv_results_list[algo])))
comparison_df["result"] = mannwhitneyu_result.apply(lambda x: "tie" if x == 0 else ("mpnn" if x == 1 else "chebconv"))
comparison_df[["algorithm", "result"]]

Unnamed: 0,algorithm,result
0,activity_selector,tie
1,articulation_points,tie
2,bellman_ford,mpnn
3,bfs,mpnn
4,binary_search,tie
5,bridges,mpnn
6,bubble_sort,chebconv
7,dag_shortest_path,mpnn
8,dfs,tie
9,dijkstra,mpnn


In [11]:
# SpectralMPNN X ChebConv

from scipy.stats import mannwhitneyu

def apply_mannwhitneyu(ls1, ls2):
    avg1 = sum(ls1)/len(ls1)
    avg2 = sum(ls2)/len(ls2)

    x, y = (ls1, ls2) if avg1 > avg2 else (ls2, ls1)
    
    res = mannwhitneyu(x, y, alternative='greater')
    
    gt_avg = 1 if avg1 > avg2 else 2
    return gt_avg if res.pvalue < 0.05 else 0

mannwhitneyu_result = pd.Series(comparison_df["algorithm"].apply(lambda algo: apply_stdtest(spectralmpnn_results_list[algo], chebconv_results_list[algo])))
comparison_df["result"] = mannwhitneyu_result.apply(lambda x: "tie" if x == 0 else ("spectralmpnn" if x == 1 else "chebconv"))
comparison_df[["algorithm", "result"]]

Unnamed: 0,algorithm,result
0,activity_selector,chebconv
1,articulation_points,tie
2,bellman_ford,spectralmpnn
3,bfs,spectralmpnn
4,binary_search,tie
5,bridges,spectralmpnn
6,bubble_sort,tie
7,dag_shortest_path,chebconv
8,dfs,tie
9,dijkstra,spectralmpnn
