# Verify effect of Hidden Similarity Regularization on Models

In [1]:
import pandas as pd

hidden_reg_df = pd.read_csv("../results/hidden_reg.csv", delimiter=";")
hidden_reg_df

Unnamed: 0,algorithm,reg_0,reg_0.1,reg_0.5
0,activity_selector,0.9098,0.913,0.8718
1,articulation_points,0.9976,0.5076,0.97595
2,bellman_ford,0.9663,0.9829,0.981
3,bfs,0.999,0.9961,0.9932
4,binary_search,0.2813,0.3438,0.4063
5,bridges,0.8704,0.8751,0.727
6,bubble_sort,0.1254,0.2236,0.2896
7,dag_shortest_paths,0.9917,0.9888,0.9868
8,dfs,0.1909,0.5498,0.1914
9,dijkstra,0.9888,0.9819,0.9863


# Load Datasets

In [2]:
import torch
import yaml
from algo_reasoning.src.sampler import CLRSDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_algorithm_args(args_file):
    with open(args_file, 'r') as f:
        args = yaml.safe_load(f)

    return args

# Comparing effect of Hidden Regularization to Algorithm Length

In [4]:
algorithm_args = load_algorithm_args("../algorithm_args/default.yaml")
hidden_reg_df["max_length"] = pd.Series([0] * len(hidden_reg_df))

for alg in hidden_reg_df.algorithm:
    print("Generating sample for: ", alg)
    algorithms = [alg]
    nb_nodes = 64
    ds = CLRSDataset(algorithms, nb_nodes, 1, 1000, seed=7, algorithms_args=algorithm_args)
    obj = next(iter(ds)).to(device=device)

    hidden_reg_df.loc[hidden_reg_df.algorithm == alg, ["max_length"]] =  obj.max_length.item()

Generating sample for:  activity_selector
Generating sample for:  articulation_points
Generating sample for:  bellman_ford
Generating sample for:  bfs
Generating sample for:  binary_search
Generating sample for:  bridges
Generating sample for:  bubble_sort
Generating sample for:  dag_shortest_paths
Generating sample for:  dfs
Generating sample for:  dijkstra
Generating sample for:  find_maximum_subarray_kadane
Generating sample for:  floyd_warshall
Generating sample for:  graham_scan
Generating sample for:  heapsort
Generating sample for:  insertion_sort
Generating sample for:  kmp_matcher
Generating sample for:  lcs_length
Generating sample for:  matrix_chain_order
Generating sample for:  minimum
Generating sample for:  mst_kruskal
Generating sample for:  mst_prim
Generating sample for:  naive_string_matcher
Generating sample for:  optimal_bst
Generating sample for:  quickselect
Generating sample for:  quicksort
Generating sample for:  segments_intersect
Generating sample for:  strong

In [5]:
reg_1_effect = hidden_reg_df["reg_0.1"] - hidden_reg_df["reg_0"]
reg_5_effect = hidden_reg_df["reg_0.5"] - hidden_reg_df["reg_0"]

hidden_reg_df["reg_0.1_effect"] = reg_1_effect
hidden_reg_df["reg_0.5_effect"] = reg_5_effect

In [6]:
_corr = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].corr()

_corr

Unnamed: 0,max_length,reg_0,reg_0.1,reg_0.1_effect,reg_0.5,reg_0.5_effect
max_length,1.0,-0.001009,-0.048073,-0.126807,0.040936,0.127936
reg_0,-0.001009,1.0,0.938425,-0.361715,0.953275,-0.392784
reg_0.1,-0.048073,0.938425,1.0,-0.017353,0.927981,-0.266913
reg_0.1_effect,-0.126807,-0.361715,-0.017353,1.0,-0.254671,0.416475
reg_0.5,0.040936,0.953275,0.927981,-0.254671,1.0,-0.096606
reg_0.5_effect,0.127936,-0.392784,-0.266913,0.416475,-0.096606,1.0


# Effect by Algorithm Type

In [7]:
type_dict = {
    "divide_and_conquer": ["find_maximum_subarray_kadane"],
    "dynamic_programming": ["matrix_chain_order", "lcs_length", "optimal_bst"],
    "geometry": ["segments_intersect", "graham_scan", "jarvis_march"],
    "graphs": ["dfs", "bfs", "topological_sort", "articulation_points", "bridges", "strongly_connected_components", "mst_kruskal", "mst_prim", "bellman_ford", "dijkstra", "dag_shortest_paths", "floyd_warshall"],
    "greedy": ["activity_selector", "task_scheduling"], 
    "searching": ["minimum", "binary_search", "quickselect"],
    "sorting": ["insertion_sort", "bubble_sort", "heapsort", "quicksort"],
    "strings": ["naive_string_matcher", "kmp_matcher"]
}

def get_algo_type(algo):
    for _type in type_dict.keys():
        print
        if algo in type_dict[_type]:
            return _type
        
hidden_reg_df["_type"] = hidden_reg_df.algorithm.apply(get_algo_type)
agg_df = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].groupby(['_type']).mean()
agg_df

Unnamed: 0_level_0,max_length,reg_0,reg_0.1,reg_0.1_effect,reg_0.5,reg_0.5_effect
_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
divide_and_conquer,64.0,0.1406,0.1719,0.0313,0.3281,0.1875
dynamic_programming,51.0,0.1244,0.103267,-0.021133,0.097767,-0.026633
geometry,62.5,0.9548,0.9731,0.0183,0.9062,-0.0486
graphs,360.0,0.782875,0.7834,0.000525,0.790129,0.007254
greedy,65.0,0.9136,0.89015,-0.02345,0.8679,-0.0457
searching,59.0,0.447933,0.4167,-0.031233,0.510467,0.062533
sorting,720.5,0.203475,0.162225,-0.04125,0.1808,-0.022675
strings,8.0,0.0625,0.20315,0.14065,0.12505,0.06255


In [8]:
agg_df.corr()

Unnamed: 0,max_length,reg_0,reg_0.1,reg_0.1_effect,reg_0.5,reg_0.5_effect
max_length,1.0,-0.063028,-0.133453,-0.419833,-0.129989,-0.25719
reg_0,-0.063028,1.0,0.988135,-0.315543,0.980033,-0.531258
reg_0.1,-0.133453,0.988135,1.0,-0.166056,0.976803,-0.48917
reg_0.1_effect,-0.419833,-0.315543,-0.166056,1.0,-0.257355,0.388718
reg_0.5,-0.129989,0.980033,0.976803,-0.257355,1.0,-0.352194
reg_0.5_effect,-0.25719,-0.531258,-0.48917,0.388718,-0.352194,1.0
