# STANDARD DATASETS 
###### Run the below cell to generate results for paper
###### NOTE: Paper generated results 1 run at a time and and used a post processing script to average out the statistics

In [None]:
import os
import math
import statistics
import yaml
import torch

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"

from cnn.cnn_architectures import CNN
from sgcn.src.architectures import SGCN
from qgcn.qgcn_architectures import QGCN
from experiment import Experiment
# from gcn_architectures import Custom_GCN, GCN, GraphConvCN, GATConvCN, SGConvCN

# Empty cache
torch.cuda.empty_cache()

# current file directory
current_file_dirpath = os.path.dirname(os.path.realpath('__file__'))

# device type for all models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("using cuda: ", torch.cuda.is_available(), "device")

# define the different datasets supported in sweep with their respective model config params
dataset_mapping = {
    "mnist": {
        "label_dim": 1,
        "out_dim": 10,
        "dim_coor": 2,
        "in_channels": 1,
        "dataset_name": "mnist_dataset_selfloops_True_edgeft_None_norm_True",
        "self_loops_included": True,
        "layers_num": 3,
        "model_dim": 32,
        "out_channels_1": 64,
        "hidden_channels": 32,
        "out_channels": 64,
        "cnn_kernel_size": 3,
        "cnn_stride": 1,
        "cnn_padding": 1,
        "qgcn_num_subkernels": 3 * 3, # same size as CNN kernel size
        "is_dataset_homogenous": True # Homogenous means spatial location mask of the nodes do not change
    },
    "fashionmnist": {
        "label_dim": 1,
        "out_dim": 10,
        "dim_coor": 2,
        "in_channels": 1,
        "dataset_name": "fashionmnist_dataset_selfloops_True_edgeft_None_norm_True",
        "self_loops_included": True,
        "layers_num": 6,
        "model_dim": 32,
        "out_channels_1": 64,
        "hidden_channels": 32,
        "out_channels": 64,
        "cnn_kernel_size": 3,
        "cnn_stride": 1,
        "cnn_padding": 1,
        "qgcn_num_subkernels": 3 * 3, # same size as CNN kernel size
        "is_dataset_homogenous": True # Homogenous means spatial location mask of the nodes do not change
    },
    "cifar10": {
        "label_dim": 1,
        "out_dim": 10,
        "dim_coor": 2,
        "in_channels": 3,
        "dataset_name": "cifar10_dataset_selfloops_True_edgeft_None_norm_True",
        "self_loops_included": True,
        "layers_num": 9,
        "model_dim": 32,
        "out_channels_1": 64,
        "hidden_channels": 32,
        "out_channels": 64,
        "cnn_kernel_size": 3,
        "cnn_stride": 1,
        "cnn_padding": 1,
        "qgcn_num_subkernels": 3 * 3, # same size as CNN kernel size
        "is_dataset_homogenous": True # Homogenous means spatial location mask of the nodes do not change
    },
}

"""
Define sweep parameters
"""
datasets = [ {"train":100,"test":20,"batch_size":16}, 
             {"train":1000,"test":200,"batch_size":16}, 
             {"train":10000,"test":1000,"batch_size":16} ]
lrs    = [10,  5,   1,   0.5,  0.1,  0.05,   0.01,  0.005,  0.001, 0.0001, 0.00001, 0.00005, 0.000001]      
epochs = [100, 100, 150, 150,  300,  300,    400,   400,    500,   600,    800,     800,     800]
runs   = [3,   3,   3,   3,    3,    3,      3,     3,      3,     3,      3,       3,       3]


"""
Helper function for collating results
Define function for handling collation
"""
def collate_stats(stats_name, max_stats, smoothened_stats):
  # collate the results to cache
  collated_stats_keys = [ f"{stats_name}_max_of_maxs",
                          f"{stats_name}_avg_of_maxs",
                          f"{stats_name}_std_of_maxs",
                          f"{stats_name}_max_of_smaxs",
                          f"{stats_name}_avg_of_smaxs",
                          f"{stats_name}_std_of_smaxs"  ]
  cnn_collated_results = { x: 0 for x in collated_stats_keys} 
  sgcn_collated_results = { x: 0 for x in collated_stats_keys} 
  qgcn_collated_results = { x: 0 for x in collated_stats_keys} 

  # save the results
  cnn_collated_results[f"{stats_name}_max_of_maxs"]  = round(max(max_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_max_of_smaxs"] = round(max(smoothened_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_avg_of_maxs"]  = round(statistics.mean(max_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_avg_of_smaxs"] = round(statistics.mean(smoothened_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_std_of_maxs"]  = round(0 if (len(max_stats["cnn"]) <= 1) else statistics.stdev(max_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_std_of_smaxs"] = round(0 if (len(smoothened_stats["cnn"]) <= 1) else statistics.stdev(smoothened_stats["cnn"]), 5)

  sgcn_collated_results[f"{stats_name}_max_of_maxs"]  = round(max(max_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_max_of_smaxs"] = round(max(smoothened_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_avg_of_maxs"]  = round(statistics.mean(max_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_avg_of_smaxs"] = round(statistics.mean(smoothened_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_std_of_maxs"]  = round(0 if (len(max_stats["sgcn"]) <= 1) else statistics.stdev(max_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_std_of_smaxs"] = round(0 if (len(smoothened_stats["sgcn"]) <= 1) else statistics.stdev(smoothened_stats["sgcn"]), 5)

  qgcn_collated_results[f"{stats_name}_max_of_maxs"]  = round(max(max_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_max_of_smaxs"] = round(max(smoothened_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_avg_of_maxs"]  = round(statistics.mean(max_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_avg_of_smaxs"] = round(statistics.mean(smoothened_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_std_of_maxs"]  = round(0 if (len(max_stats["qgcn"]) <= 1) else statistics.stdev(max_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_std_of_smaxs"] = round(0 if (len(smoothened_stats["qgcn"]) <= 1) else statistics.stdev(smoothened_stats["qgcn"]), 5)

  # Return results
  return cnn_collated_results, sgcn_collated_results, qgcn_collated_results


"""
SWEEPING Logic below
Loops through the different sweep parameters to train different models
"""
for dataset_split in datasets: # loop over datasets
    # extract batch size which is peculiar to dataset split
    batch_size = dataset_split.get('batch_size', 64)
    
    # Inner loop goes over all datasets
    for selected_dataset, selected_dataset_config in dataset_mapping.items():
        # Prep experiment name
        experiment_name = f"BATCH-RESULTS-ALL-DATASETS-{selected_dataset.capitalize()}_Summary"
        experiments_dir = os.path.join(os.path.dirname(os.path.realpath('__file__')), "Experiments")
        experiment_result_filepath = os.path.join(experiments_dir, f'{"_".join(experiment_name.split(" "))}.yaml')
        averaging_window_width = 0.05 # fraction -> 5%
        results = {} # to hold results for saving
        if os.path.exists(experiment_result_filepath):
            with open(experiment_result_filepath, "r") as file_stream:
                results = yaml.safe_load(file_stream)
                if results:
                    results = dict(results)
                else:
                    results = {}
                    
        # Load the required params for this dataset
        dataset_name           = selected_dataset_config["dataset_name"]
        self_loops_included    = selected_dataset_config["self_loops_included"]
        layers_num             = selected_dataset_config["layers_num"]
        model_dim              = selected_dataset_config["model_dim"]
        out_channels_1         = selected_dataset_config["out_channels_1"]
        dim_coor               = selected_dataset_config["dim_coor"]
        label_dim              = selected_dataset_config["label_dim"]
        out_dim                = selected_dataset_config["out_dim"]
        in_channels            = selected_dataset_config["in_channels"]
        hidden_channels        = selected_dataset_config["hidden_channels"]
        out_channels           = selected_dataset_config["out_channels"]
        cnn_kernel_size        = selected_dataset_config["cnn_kernel_size"]
        cnn_stride             = selected_dataset_config["cnn_stride"]
        cnn_padding            = selected_dataset_config["cnn_padding"]
        qgcn_num_subkernels    = selected_dataset_config["qgcn_num_subkernels"]
        is_dataset_homogenous  = selected_dataset_config["is_dataset_homogenous"]
    
        # Inner-Inner loop
        for i, lr in enumerate(lrs): # loop over learning rates
          optim_params = { "lr": lr }
          num_epochs = epochs[i]
          num_runs = runs[i]
          # create the key for hashing into results
          results_hash_key = f'train_{dataset_split["train"]}_test_{dataset_split["test"]}_lr_{lr}'
          results[results_hash_key] = {}
          # run stats
          mean_train_loss = { "cnn": [], "sgcn": [], "qgcn": []}
          smoothened_train_loss = { "cnn": [], "sgcn": [], "qgcn": []}
          max_train_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          smoothened_train_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          max_test_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          smoothened_test_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          for run in range(num_runs): # loop over num runs
            # initialize the models
            sgcn_model = SGCN(dim_coor=dim_coor,
                              out_dim=out_dim,
                              input_features=in_channels, # label_dim,
                              layers_num=layers_num,
                              model_dim=model_dim,
                              out_channels_1=out_channels_1)

            # cnn model
            cnn_model = CNN(out_dim=out_dim,
                            hidden_channels=hidden_channels,
                            in_channels=in_channels, 
                            out_channels=out_channels,
                            kernel_size=cnn_kernel_size,
                            stride=cnn_stride,
                            layers_num=layers_num,
                            padding=cnn_padding)

            # qgcn model
            qgcn_model = QGCN(dim_coor=dim_coor,
                              out_dim=out_dim,
                              in_channels=in_channels,
                              hidden_channels=hidden_channels,
                              out_channels=out_channels,
                              layers_num=layers_num,
                              num_kernels=qgcn_num_subkernels,
                              self_loops_included=self_loops_included,
                              is_dataset_homogenous=is_dataset_homogenous, # determines whether to apply caching for kernel masks
                              apply_spatial_scalars=False, # SGCN-like behavior; refer to code and paper for more details
                              initializer_model=cnn_model, # comment this out to have independent initializations
                              device=device)

            # setup experiments to run
            num_train, num_test = dataset_split["train"], dataset_split["test"]
            experiment_id = f'_{run}_full_blown_exp_train_{num_train}_test_{num_test}_i_{i}_lr_{lr}_num_epochs_{num_epochs}'
            experiment = Experiment(sgcn_model = sgcn_model,
                                    qgcn_model = qgcn_model,
                                    cnn_model = cnn_model,
                                    optim_params = optim_params,
                                    base_path = "./", 
                                    num_train = num_train,
                                    num_test = num_test,
                                    dataset_name = dataset_name,
                                    train_batch_size = batch_size,
                                    test_batch_size = batch_size,
                                    train_shuffle_data = True,
                                    test_shuffle_data = False,
                                    id = experiment_id) # mark this experiment ...

            # run the experiment ...
            experiment.run(num_epochs=num_epochs, eval_training_set=False) # specify num epochs ...

            # load collected stats during runs ...
            (train_cnn_loss_array, train_qgcn_loss_array, train_sgcn_loss_array, \
             train_cnn_acc_array, train_qgcn_acc_array, train_sgcn_acc_array, \
             test_cnn_acc_array, test_qgcn_acc_array, test_sgcn_acc_array) = experiment.load_cached_results() # only accuracies on train and test sets ...
            
            # get the mean stats
            mean_train_loss["cnn"].append(statistics.mean(train_cnn_loss_array))
            mean_train_loss["sgcn"].append(statistics.mean(train_sgcn_loss_array))
            mean_train_loss["qgcn"].append(statistics.mean(train_qgcn_loss_array))
            
            max_train_acc["cnn"].append(max(train_cnn_acc_array))
            max_train_acc["sgcn"].append(max(train_sgcn_acc_array))
            max_train_acc["qgcn"].append(max(train_qgcn_acc_array))
            
            max_test_acc["cnn"].append(max(test_cnn_acc_array))
            max_test_acc["sgcn"].append(max(test_sgcn_acc_array))
            max_test_acc["qgcn"].append(max(test_qgcn_acc_array))

            # get the smoothened max test acc
            num_averaging_window = int(math.ceil(averaging_window_width * num_epochs))
            smoothened_train_loss["cnn"].append(statistics.mean(train_cnn_loss_array[-num_averaging_window:]))
            smoothened_train_loss["sgcn"].append(statistics.mean(train_sgcn_loss_array[-num_averaging_window:]))
            smoothened_train_loss["qgcn"].append(statistics.mean(train_qgcn_loss_array[-num_averaging_window:]))
            
            smoothened_train_acc["cnn"].append(statistics.mean(train_cnn_acc_array[-num_averaging_window:]))
            smoothened_train_acc["sgcn"].append(statistics.mean(train_sgcn_acc_array[-num_averaging_window:]))
            smoothened_train_acc["qgcn"].append(statistics.mean(train_qgcn_acc_array[-num_averaging_window:]))
            
            smoothened_test_acc["cnn"].append(statistics.mean(test_cnn_acc_array[-num_averaging_window:]))
            smoothened_test_acc["sgcn"].append(statistics.mean(test_sgcn_acc_array[-num_averaging_window:]))
            smoothened_test_acc["qgcn"].append(statistics.mean(test_qgcn_acc_array[-num_averaging_window:]))

          # get collated stats
          train_loss_cnn_results, train_loss_sgcn_results, train_loss_qgcn_results = collate_stats("train_loss", mean_train_loss, smoothened_train_loss)
          train_acc_cnn_results,  train_acc_sgcn_results,  train_acc_qgcn_results  = collate_stats("train_acc", max_train_acc, smoothened_train_acc)
          test_acc_cnn_results,   test_acc_sgcn_results,   test_acc_qgcn_results   = collate_stats("test_acc", max_test_acc, smoothened_test_acc)
          all_cnn_stats  = {**train_loss_cnn_results,  **train_acc_cnn_results,  **test_acc_cnn_results}
          all_sgcn_stats = {**train_loss_sgcn_results, **train_acc_sgcn_results, **test_acc_sgcn_results}
          all_qgcn_stats = {**train_loss_qgcn_results, **train_acc_qgcn_results, **test_acc_qgcn_results}

          # save results into results obj
          results[results_hash_key]["cnn"] = all_cnn_stats
          results[results_hash_key]["sgcn"] = all_sgcn_stats
          results[results_hash_key]["qgcn"] = all_qgcn_stats

          # pickle the results
          with open(experiment_result_filepath, "w") as file_stream:
            yaml.dump(results, file_stream)


# CUSTOM DATASETS 
###### Run the below cell to generate results for paper
###### NOTE: Paper generated results 1 run at a time and used a post processing script to average out the statistics

In [None]:
import os
import math
import statistics
import yaml
import torch

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"

from cnn.cnn_architectures import CNN
from sgcn.src.architectures import SGCN
from qgcn.qgcn_architectures import QGCN
from experiment import Experiment
# from gcn_architectures import Custom_GCN, GCN, GraphConvCN, GATConvCN, SGConvCN

# Empty cache
torch.cuda.empty_cache()

# current file directory
current_file_dirpath = os.path.dirname(os.path.realpath('__file__'))

# device type for all models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("using cuda: ", torch.cuda.is_available(), "device")

# define the different datasets supported in sweep with their respective model config params
dataset_mapping = {
    "navier_stokes_binary": {
        "label_dim": 1,
        "out_dim": 10,
        "dim_coor": 2,
        "in_channels": 1,
        "dataset_name": "navier_stokes_binary",
        "self_loops_included": False,
        "layers_num": 3,
        "model_dim": 32,
        "out_channels_1": 64,
        "hidden_channels": 32,
        "out_channels": 64,
        "cnn_kernel_size": 3,
        "cnn_stride": 1,
        "cnn_padding": 1,
        "qgcn_num_subkernels": 11,
        "is_dataset_homogenous": True # Homogenous means spatial location mask of the nodes do not change
    },
    "navier_stokes_denary_1": {
        "label_dim": 1,
        "out_dim": 10,
        "dim_coor": 2,
        "in_channels": 1,
        "dataset_name": "navier_stokes_denary_1",
        "self_loops_included": False,
        "layers_num": 3,
        "model_dim": 32,
        "out_channels_1": 64,
        "hidden_channels": 32,
        "out_channels": 64,
        "cnn_kernel_size": 3,
        "cnn_stride": 1,
        "cnn_padding": 1,
        "qgcn_num_subkernels": 11,
        "is_dataset_homogenous": True # Homogenous means spatial location mask of the nodes do not change
    },
    "navier_stokes_denary_2": {
        "label_dim": 1,
        "out_dim": 10,
        "dim_coor": 2,
        "in_channels": 1,
        "dataset_name": "navier_stokes_denary_2",
        "self_loops_included": False,
        "layers_num": 3,
        "model_dim": 32,
        "out_channels_1": 64,
        "hidden_channels": 32,
        "out_channels": 64,
        "cnn_kernel_size": 3,
        "cnn_stride": 1,
        "cnn_padding": 1,
        "qgcn_num_subkernels": 11,
        "is_dataset_homogenous": True # Homogenous means spatial location mask of the nodes do not change
    },
}

"""
Define sweep parameters
"""
datasets = [ {"train":100,"test":20,"batch_size":16}, 
             {"train":1000,"test":200,"batch_size":16}, 
             {"train":10000,"test":1000,"batch_size":16} ]
lrs    = [10,  5,   1,   0.5,  0.1,  0.05,   0.01,  0.005,  0.001, 0.0001, 0.00001, 0.00005, 0.000001]      
epochs = [100, 100, 100, 100,  100,  100,    100,   100,    100,   100,    100,     100,     100]
runs   = [3,   3,   3,   3,    3,    3,      3,     3,      3,     3,      3,       3,       3]


"""
Helper function for collating results
Define function for handling collation
"""
def collate_stats(stats_name, max_stats, smoothened_stats):
  # collate the results to cache
  collated_stats_keys = [ f"{stats_name}_max_of_maxs",
                          f"{stats_name}_avg_of_maxs",
                          f"{stats_name}_std_of_maxs",
                          f"{stats_name}_max_of_smaxs",
                          f"{stats_name}_avg_of_smaxs",
                          f"{stats_name}_std_of_smaxs"  ]
  cnn_collated_results = { x: 0 for x in collated_stats_keys} 
  sgcn_collated_results = { x: 0 for x in collated_stats_keys} 
  qgcn_collated_results = { x: 0 for x in collated_stats_keys} 

  # save the results
  cnn_collated_results[f"{stats_name}_max_of_maxs"]  = round(max(max_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_max_of_smaxs"] = round(max(smoothened_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_avg_of_maxs"]  = round(statistics.mean(max_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_avg_of_smaxs"] = round(statistics.mean(smoothened_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_std_of_maxs"]  = round(0 if (len(max_stats["cnn"]) <= 1) else statistics.stdev(max_stats["cnn"]), 5)
  cnn_collated_results[f"{stats_name}_std_of_smaxs"] = round(0 if (len(smoothened_stats["cnn"]) <= 1) else statistics.stdev(smoothened_stats["cnn"]), 5)

  sgcn_collated_results[f"{stats_name}_max_of_maxs"]  = round(max(max_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_max_of_smaxs"] = round(max(smoothened_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_avg_of_maxs"]  = round(statistics.mean(max_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_avg_of_smaxs"] = round(statistics.mean(smoothened_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_std_of_maxs"]  = round(0 if (len(max_stats["sgcn"]) <= 1) else statistics.stdev(max_stats["sgcn"]), 5)
  sgcn_collated_results[f"{stats_name}_std_of_smaxs"] = round(0 if (len(smoothened_stats["sgcn"]) <= 1) else statistics.stdev(smoothened_stats["sgcn"]), 5)

  qgcn_collated_results[f"{stats_name}_max_of_maxs"]  = round(max(max_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_max_of_smaxs"] = round(max(smoothened_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_avg_of_maxs"]  = round(statistics.mean(max_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_avg_of_smaxs"] = round(statistics.mean(smoothened_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_std_of_maxs"]  = round(0 if (len(max_stats["qgcn"]) <= 1) else statistics.stdev(max_stats["qgcn"]), 5)
  qgcn_collated_results[f"{stats_name}_std_of_smaxs"] = round(0 if (len(smoothened_stats["qgcn"]) <= 1) else statistics.stdev(smoothened_stats["qgcn"]), 5)

  # Return results
  return cnn_collated_results, sgcn_collated_results, qgcn_collated_results


"""
SWEEPING Logic below
Loops through the different sweep parameters to train different models
"""
for dataset_split in datasets: # loop over datasets
    # extract batch size which is peculiar to dataset split
    batch_size = dataset_split.get('batch_size', 64)
    
    # Inner loop goes over all datasets
    for selected_dataset, selected_dataset_config in dataset_mapping.items():
        # Prep experiment name
        experiment_name = f"BATCH-RESULTS-ALL-DATASETS-{selected_dataset.capitalize()}_Summary"
        experiments_dir = os.path.join(os.path.dirname(os.path.realpath('__file__')), "Experiments")
        experiment_result_filepath = os.path.join(experiments_dir, f'{"_".join(experiment_name.split(" "))}.yaml')
        averaging_window_width = 0.05 # fraction -> 5%
        results = {} # to hold results for saving
        if os.path.exists(experiment_result_filepath):
            with open(experiment_result_filepath, "r") as file_stream:
                results = yaml.safe_load(file_stream)
                if results:
                    results = dict(results)
                else:
                    results = {}
                    
        # Load the required params for this dataset
        dataset_name           = selected_dataset_config["dataset_name"]
        self_loops_included    = selected_dataset_config["self_loops_included"]
        layers_num             = selected_dataset_config["layers_num"]
        model_dim              = selected_dataset_config["model_dim"]
        out_channels_1         = selected_dataset_config["out_channels_1"]
        dim_coor               = selected_dataset_config["dim_coor"]
        label_dim              = selected_dataset_config["label_dim"]
        out_dim                = selected_dataset_config["out_dim"]
        in_channels            = selected_dataset_config["in_channels"]
        hidden_channels        = selected_dataset_config["hidden_channels"]
        out_channels           = selected_dataset_config["out_channels"]
        cnn_kernel_size        = selected_dataset_config["cnn_kernel_size"]
        cnn_stride             = selected_dataset_config["cnn_stride"]
        cnn_padding            = selected_dataset_config["cnn_padding"]
        qgcn_num_subkernels    = selected_dataset_config["qgcn_num_subkernels"]
        is_dataset_homogenous  = selected_dataset_config["is_dataset_homogenous"]
    
        # Inner-Inner loop
        for i, lr in enumerate(lrs): # loop over learning rates
          optim_params = { "lr": lr }
          num_epochs = epochs[i]
          num_runs = runs[i]
          # create the key for hashing into results
          results_hash_key = f'train_{dataset_split["train"]}_test_{dataset_split["test"]}_lr_{lr}'
          results[results_hash_key] = {}
          # run stats
          mean_train_loss = { "cnn": [], "sgcn": [], "qgcn": []}
          smoothened_train_loss = { "cnn": [], "sgcn": [], "qgcn": []}
          max_train_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          smoothened_train_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          max_test_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          smoothened_test_acc = { "cnn": [], "sgcn": [], "qgcn": []}
          for run in range(num_runs): # loop over num runs
            # initialize the models
            sgcn_model = SGCN(dim_coor=dim_coor,
                              out_dim=out_dim,
                              input_features=in_channels, # label_dim,
                              layers_num=layers_num,
                              model_dim=model_dim,
                              out_channels_1=out_channels_1)

            # cnn model
            cnn_model = None

            # convolutional graph neural networks
            qgcn_model = QGCN(dim_coor=dim_coor,
                              out_dim=out_dim,
                              in_channels=in_channels,
                              hidden_channels=hidden_channels,
                              out_channels=out_channels,
                              layers_num=layers_num,
                              num_kernels=qgcn_num_subkernels,
                              self_loops_included=self_loops_included,
                              is_dataset_homogenous=is_dataset_homogenous, # determines whether to apply caching for kernel masks
                              apply_spatial_scalars=False, # SGCN-like behavior; refer to code and paper for more details
                              initializer_model=cnn_model, # comment this out to have independent initializations
                              device=device)
            
            # setup experiments to run
            num_train, num_test = dataset_split["train"], dataset_split["test"]
            experiment_id = f'_{run}_full_blown_exp_train_{num_train}_test_{num_test}_i_{i}_lr_{lr}_num_epochs_{num_epochs}'
            experiment = Experiment(sgcn_model = sgcn_model,
                                    qgcn_model = qgcn_model,
                                    cnn_model = cnn_model,
                                    optim_params = optim_params,
                                    base_path = "./", 
                                    num_train = num_train,
                                    num_test = num_test,
                                    dataset_name = dataset_name,
                                    train_batch_size=batch_size,
                                    test_batch_size=batch_size,
                                    train_shuffle_data=True,
                                    test_shuffle_data=False,
                                    id = experiment_id) # mark this experiment ...

            # run the experiment ...
            experiment.run(num_epochs=num_epochs, eval_training_set=False) # specify num epochs ...

            # load collected stats during runs ...
            (train_cnn_loss_array, train_qgcn_loss_array, train_sgcn_loss_array, \
             train_cnn_acc_array, train_qgcn_acc_array, train_sgcn_acc_array, \
             test_cnn_acc_array, test_qgcn_acc_array, test_sgcn_acc_array) = experiment.load_cached_results() # only accuracies on train and test sets ...
            
            # get the mean stats
            mean_train_loss["cnn"].append(statistics.mean(train_cnn_loss_array))
            mean_train_loss["sgcn"].append(statistics.mean(train_sgcn_loss_array))
            mean_train_loss["qgcn"].append(statistics.mean(train_qgcn_loss_array))
            
            max_train_acc["cnn"].append(max(train_cnn_acc_array))
            max_train_acc["sgcn"].append(max(train_sgcn_acc_array))
            max_train_acc["qgcn"].append(max(train_qgcn_acc_array))
            
            max_test_acc["cnn"].append(max(test_cnn_acc_array))
            max_test_acc["sgcn"].append(max(test_sgcn_acc_array))
            max_test_acc["qgcn"].append(max(test_qgcn_acc_array))

            # get the smoothened max test acc
            num_averaging_window = int(math.ceil(averaging_window_width * num_epochs))
            smoothened_train_loss["cnn"].append(statistics.mean(train_cnn_loss_array[-num_averaging_window:]))
            smoothened_train_loss["sgcn"].append(statistics.mean(train_sgcn_loss_array[-num_averaging_window:]))
            smoothened_train_loss["qgcn"].append(statistics.mean(train_qgcn_loss_array[-num_averaging_window:]))
            
            smoothened_train_acc["cnn"].append(statistics.mean(train_cnn_acc_array[-num_averaging_window:]))
            smoothened_train_acc["sgcn"].append(statistics.mean(train_sgcn_acc_array[-num_averaging_window:]))
            smoothened_train_acc["qgcn"].append(statistics.mean(train_qgcn_acc_array[-num_averaging_window:]))
            
            smoothened_test_acc["cnn"].append(statistics.mean(test_cnn_acc_array[-num_averaging_window:]))
            smoothened_test_acc["sgcn"].append(statistics.mean(test_sgcn_acc_array[-num_averaging_window:]))
            smoothened_test_acc["qgcn"].append(statistics.mean(test_qgcn_acc_array[-num_averaging_window:]))
          
          # get collated stats
          train_loss_cnn_results, train_loss_sgcn_results, train_loss_qgcn_results = collate_stats("train_loss", mean_train_loss, smoothened_train_loss)
          train_acc_cnn_results,  train_acc_sgcn_results,  train_acc_qgcn_results  = collate_stats("train_acc", max_train_acc, smoothened_train_acc)
          test_acc_cnn_results,   test_acc_sgcn_results,   test_acc_qgcn_results   = collate_stats("test_acc", max_test_acc, smoothened_test_acc)
          all_cnn_stats  = {**train_loss_cnn_results,  **train_acc_cnn_results,  **test_acc_cnn_results}
          all_sgcn_stats = {**train_loss_sgcn_results, **train_acc_sgcn_results, **test_acc_sgcn_results}
          all_qgcn_stats = {**train_loss_qgcn_results, **train_acc_qgcn_results, **test_acc_qgcn_results}
          
          # save results into results obj
          results[results_hash_key]["cnn"] = all_cnn_stats
          results[results_hash_key]["sgcn"] = all_sgcn_stats
          results[results_hash_key]["qgcn"] = all_qgcn_stats

          # pickle the results
          with open(experiment_result_filepath, "w") as file_stream:
            yaml.dump(results, file_stream)
            

### HELPER FUNCTIONS

In [None]:
import matplotlib.pyplot as plt

"""
Plots weight distribution histogram
- Usecase: confirming equivalence between CNN and QGCN param inits
"""
def plot_weight_histogram(w_tensor):
    randNumpy = w_tensor.cpu().detach().clone().numpy().flatten()
    n_bins = 10
    fig, ax0 = plt.subplots(nrows=1, ncols=1)

    colors = ['black']
    ax0.hist(randNumpy, n_bins, density=True, histtype='bar', color=colors, label=colors)
    ax0.legend(prop={'size': 10})
    ax0.set_title('bars with legend')

    fig.tight_layout()
    plt.show()

In [None]:
# extract model params from all three models:
def print_model_params(model):
    for name, param in model.named_parameters():
        print(name)
        plot_weight_histogram(param)
    print()
print_model_params(cnn_model)
print_model_params(qgcn_model)
print_model_params(sgcn_model)