# Verify effect of Hidden Similarity Regularization on Models

In [None]:
import pandas as pd

hidden_reg_df = pd.read_csv("../results/hidden_reg.csv", delimiter=";")
hidden_reg_df

# Load Datasets

In [None]:
import torch
import yaml
from algo_reasoning.src.sampler import CLRSDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def load_algorithm_args(args_file):
    with open(args_file, 'r') as f:
        args = yaml.safe_load(f)

    return args

# Comparing effect of Hidden Regularization to Algorithm Length

In [None]:
algorithm_args = load_algorithm_args("../algorithm_args/default.yaml")
hidden_reg_df["max_length"] = pd.Series([0] * len(hidden_reg_df))

for alg in hidden_reg_df.algorithm:
    print("Generating sample for: ", alg)
    algorithms = [alg]
    nb_nodes = 64
    ds = CLRSDataset(algorithms, nb_nodes, 1, 1000, seed=7, algorithms_args=algorithm_args)
    obj = next(iter(ds)).to(device=device)

    hidden_reg_df.loc[hidden_reg_df.algorithm == alg, ["max_length"]] =  obj.max_length.item()

In [None]:
reg_1_effect = hidden_reg_df["0.1_test"] - hidden_reg_df["0.0_test"]
reg_5_effect = hidden_reg_df["0.5_test"] - hidden_reg_df["0.0_test"]

ood_gap_reg_0 = hidden_reg_df["0.0_val"] - hidden_reg_df["0.0_test"]
ood_gap_reg_1 = hidden_reg_df["0.1_val"] - hidden_reg_df["0.1_test"]
ood_gap_reg_5 = hidden_reg_df["0.5_val"] - hidden_reg_df["0.5_test"]

hidden_reg_df["reg_0.1_effect"] = reg_1_effect
hidden_reg_df["reg_0.5_effect"] = reg_5_effect
hidden_reg_df["ood_gap_0.0"] = ood_gap_reg_0
hidden_reg_df["ood_gap_0.1"] = ood_gap_reg_1
hidden_reg_df["ood_gap_0.5"] = ood_gap_reg_5

In [None]:
hidden_reg_df[["reg_0.1_effect", "reg_0.5_effect"]].mean()

In [None]:
hidden_reg_df[["0.0_test", "0.1_test", "0.5_test"]].mean()

In [None]:
hidden_reg_df[["ood_gap_0.0", "ood_gap_0.1", "ood_gap_0.5"]].mean()

In [None]:
_corr = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].corr()

_corr["max_length"]

# Effect by Algorithm Type

In [None]:
type_dict = {
    "divide_and_conquer": ["find_maximum_subarray_kadane"],
    "dynamic_programming": ["matrix_chain_order", "lcs_length", "optimal_bst"],
    "geometry": ["segments_intersect", "graham_scan", "jarvis_march"],
    "graphs": ["dfs", "bfs", "topological_sort", "articulation_points", "bridges", "strongly_connected_components", "mst_kruskal", "mst_prim", "bellman_ford", "dijkstra", "dag_shortest_paths", "floyd_warshall"],
    "greedy": ["activity_selector", "task_scheduling"], 
    "searching": ["minimum", "binary_search", "quickselect"],
    "sorting": ["insertion_sort", "bubble_sort", "heapsort", "quicksort"],
    "strings": ["naive_string_matcher", "kmp_matcher"]
}

def get_algo_type(algo):
    for _type in type_dict.keys():
        print
        if algo in type_dict[_type]:
            return _type
        
hidden_reg_df["_type"] = hidden_reg_df.algorithm.apply(get_algo_type)
agg_df = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].groupby(['_type']).mean()
agg_df

In [None]:
agg_df[["ood_gap_0.0", "ood_gap_0.1", "ood_gap_0.5"]]

In [None]:
agg_df.corr()

# Effect by Output Type

In [None]:
from algo_reasoning.src.specs import SPECS, Stage

def get_output_type(algo):
    for k, v in SPECS[algo].items():
        stage, _, _type = v

        if stage == Stage.OUTPUT:
            return _type
        
hidden_reg_df["_output_type"] = hidden_reg_df.algorithm.apply(get_output_type)
hidden_reg_df
output_agg_df = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm', "_type"])].groupby(['_output_type']).mean()
output_agg_df

In [None]:
output_agg_df[["ood_gap_0.0", "ood_gap_0.1", "ood_gap_0.5"]]

In [None]:
output_agg_df[["0.0_test", "0.1_test", "0.5_test"]]