# Calculate val and test F1 of a given model trained on algoritmh and hidden regularization

In [None]:
from algo_reasoning.src.models.network import EncodeProcessDecode
from algo_reasoning.src.lightning.AlgorithmicReasoningTask import AlgorithmicReasoningTask 
from algo_reasoning.src.specs import CLRS_30_ALGS
from algo_reasoning.src.losses.AlgorithmicReasoningLoss import AlgorithmicReasoningLoss
from algo_reasoning.src.data import OriginalCLRSDataset, CLRSSampler, collate
from algo_reasoning.src.sampler import CLRSDataset

import lightning as L
from torch.utils.data import DataLoader, get_worker_info
from pathlib import Path
import yaml

In [None]:
checkpoints_path = "../checkpoints/"

def load_module(algorithm, hidden_reg):
    dir_path = Path(f"../checkpoints/{algorithm}/")

    ckpt_path = list(dir_path.glob(f"*hidden_reg={hidden_reg}*"))[0]

    model = EncodeProcessDecode([algorithm])
    loss_fn = AlgorithmicReasoningLoss(reg_weight=float(hidden_reg))
    
    model_hidden = AlgorithmicReasoningTask.load_from_checkpoint(ckpt_path, model=model, loss_fn=loss_fn)

    return model_hidden

def f1_val_test(algorithm, hidden_reg):
    lightning_module = load_module(algorithm, hidden_reg)

    trainer = L.Trainer(devices=1, accelerator="gpu", use_distributed_sampler=False)

    val_dataset = OriginalCLRSDataset([algorithm], "val", "../tmp/CLRS30")
    val_sampler = CLRSSampler(val_dataset, algorithms=[algorithm], batch_size=8, seed=7)
    val_dataloader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, persistent_workers=True, collate_fn=collate)
    
    test_dataset = OriginalCLRSDataset([algorithm], "test", "../tmp/CLRS30")
    test_sampler = CLRSSampler(test_dataset, algorithms=[algorithm], batch_size=8, seed=7)
    test_dataloader = DataLoader(test_dataset, batch_sampler=test_sampler, num_workers=8, persistent_workers=True, collate_fn=collate)

    val_metrics = trainer.test(lightning_module, val_dataloader, verbose=False)
    test_metrics = trainer.test(lightning_module, test_dataloader, verbose=False)

    return {"val_metrics": val_metrics[0]["test_f1"], "test_metrics":test_metrics[0]["test_f1"]}

In [None]:
from algo_reasoning.src.specs import CLRS_30_ALGS
import pandas as pd

metrics_0 = []
metrics_1 = []
metrics_5 = []
for algo in CLRS_30_ALGS:
    print(algo)
    metrics_0.append(f1_val_test(algo, "0.0")) 
    metrics_1.append(f1_val_test(algo, "0.1")) 
    metrics_5.append(f1_val_test(algo, "0.5")) 

In [None]:
algos = pd.Series(CLRS_30_ALGS)

metrics_0_val = pd.Series(metrics_0).map(lambda x: x["val_metrics"])
metrics_1_val = pd.Series(metrics_1).map(lambda x: x["val_metrics"])
metrics_5_val = pd.Series(metrics_5).map(lambda x: x["val_metrics"])

metrics_0_test = pd.Series(metrics_0).map(lambda x: x["test_metrics"])
metrics_1_test = pd.Series(metrics_1).map(lambda x: x["test_metrics"])
metrics_5_test = pd.Series(metrics_5).map(lambda x: x["test_metrics"])

In [None]:
hidden_reg_df = pd.DataFrame({"algorithm": algos,
                            "0.0_val": metrics_0_val,
                             "0.0_test": metrics_0_test,
                             "0.1_val": metrics_1_val,
                             "0.1_test": metrics_1_test,
                             "0.5_val": metrics_5_val,
                             "0.5_test": metrics_5_test})
hidden_reg_df = hidden_reg_df.set_index("algorithm")
hidden_reg_df.to_csv("../results/hidden_reg.csv", sep=";")

# Verify effect of Hidden Similarity Regularization on Models

In [None]:
import pandas as pd

hidden_reg_df = pd.read_csv("../results/hidden_reg.csv", delimiter=";")
hidden_reg_df

# Comparing effect of Hidden Regularization to Algorithm Length

In [None]:
algorithm_args = load_algorithm_args("../algorithm_args/default.yaml")
hidden_reg_df["max_length"] = pd.Series([0] * len(hidden_reg_df))

for alg in hidden_reg_df.algorithm:
    print("Generating sample for: ", alg)
    algorithms = [alg]
    nb_nodes = 64
    ds = CLRSDataset(algorithms, nb_nodes, 1, 1000, seed=7, algorithms_args=algorithm_args)
    obj = next(iter(ds)).to(device=device)

    hidden_reg_df.loc[hidden_reg_df.algorithm == alg, ["max_length"]] =  obj.max_length.item()

In [None]:
reg_1_effect = hidden_reg_df["0.1_test"] - hidden_reg_df["0.0_test"]
reg_5_effect = hidden_reg_df["0.5_test"] - hidden_reg_df["0.0_test"]

reg_0_ood_gap = hidden_reg_df["0.0_val"] - hidden_reg_df["0.0_test"]
reg_1_ood_gap = hidden_reg_df["0.1_val"] - hidden_reg_df["0.1_test"]
reg_5_ood_gap = hidden_reg_df["0.5_val"] - hidden_reg_df["0.5_test"]

hidden_reg_df["reg_0.1_effect"] = reg_1_effect
hidden_reg_df["reg_0.5_effect"] = reg_5_effect
hidden_reg_df["reg_0.0_ood_gap"] = reg_0_ood_gap
hidden_reg_df["reg_0.1_ood_gap"] = reg_1_ood_gap
hidden_reg_df["reg_0.5_ood_gap"] = reg_5_ood_gap

In [None]:
hidden_reg_df[["reg_0.1_effect", "reg_0.5_effect"]]

In [None]:
_corr = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].corr()

_corr

# Effect by Algorithm Type

In [None]:
type_dict = {
    "divide_and_conquer": ["find_maximum_subarray_kadane"],
    "dynamic_programming": ["matrix_chain_order", "lcs_length", "optimal_bst"],
    "geometry": ["segments_intersect", "graham_scan", "jarvis_march"],
    "graphs": ["dfs", "bfs", "topological_sort", "articulation_points", "bridges", "strongly_connected_components", "mst_kruskal", "mst_prim", "bellman_ford", "dijkstra", "dag_shortest_paths", "floyd_warshall"],
    "greedy": ["activity_selector", "task_scheduling"], 
    "searching": ["minimum", "binary_search", "quickselect"],
    "sorting": ["insertion_sort", "bubble_sort", "heapsort", "quicksort"],
    "strings": ["naive_string_matcher", "kmp_matcher"]
}

def get_algo_type(algo):
    for _type in type_dict.keys():
        print
        if algo in type_dict[_type]:
            return _type
        
hidden_reg_df["_type"] = hidden_reg_df.algorithm.apply(get_algo_type)
agg_df = hidden_reg_df[hidden_reg_df.columns.difference(['algorithm'])].groupby(['_type']).mean()
agg_df

In [None]:
agg_df[["reg_0.1_effect", "reg_0.5_effect"]]

In [None]:
agg_df.corr()

# Looking at average OOD Gap

In [None]:
hidden_reg_df[["reg_0.0_ood_gap", "reg_0.1_ood_gap", "reg_0.5_ood_gap"]]

In [None]:
agg_df[["reg_0.0_ood_gap", "reg_0.1_ood_gap", "reg_0.5_ood_gap"]]