In [1]:
import numpy as np
import time
import dgl
import torch
from torch import nn
import networkx as nx
from tqdm.auto import tqdm

import src.dag_utils as dagu
import src.utils as utils
from src.arch import FB_DAGConv, ParallelMLPSum, SharedMLPSum
from src.models import Model, LinDAGRegModel
from src.baselines_archs import GAT, MLP, MyGCNN, GraphSAGE, GIN

import os

# Ser random seed
SEED = 10
PATH = 'results/diffusion/'
SAVE = False
np.random.seed(SEED)
torch.manual_seed(SEED)
dgl.random.seed(SEED)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
# torch.cuda.set_per_process_memory_fraction(.5, device=device)

cuda:0


In [2]:
M = 2000

data_p = {
    'n_tries': 1,  #25,

    ## Graph parameters
    'p': 0.2,  # .2                  # Edge prob in Erdos-Renyi DAG
    'N': 100,                    # Number of nodes

    ## Signal parameters
    'M': M,                   # Number of observed signals
    'M_train': int(0.7 * M),  # Samples selected for training
    'M_val': int(0.2 * M),    # Samples selected for validation
    'M_test': int(0.1 * M),   # Samples selected for test
    'src_t': 'constant',          # 'random' or 'constant'
    'max_src_node': 25, #25,           # Maximum index of nodes allowed to be sources
    'n_sources': 5,             # Maximum Number of source nodes
    'n_p_x': .05,
    'n_p_y': .05,                 # Normalized noise power
    'max_GSO': 100,              # Maximum index of GSOs involved in the diffusion
    'min_GSO': 50,               # Minimum index of GSOs involved in the diffusion
    'n_GSOs': 25                 # Number of GSOs
}

# Model parameters
default_arch_args = {
    'in_dim': 1,        # Input dimension
    'hid_dim': 32,     # Hidden dimension
    'out_dim': 1,       # Output dimension
    'n_layers': 2,#2,  # 3 also works well          # Number of layers
    'l_act': None,
    'bias': True,
}

default_mod_p = {
    'bs': 25,           # Size of the batch
    'lr': 5e-4,         # Learning rate
    'epochs': 50,  #50,       # Number of training epochs 
    'pat': 25,  # 15        # Number of non-decreasing epoch to stop training
    'wd': 1e-4,         # Weight decay
}

In [3]:
def get_signals(d_p, GSOs):
    range_GSO = np.arange(d_p['min_GSO'], d_p['max_GSO'])
    gsos_idx = np.random.choice(range_GSO, size=d_p['n_GSOs'], replace=False)
    sel_GSOs = GSOs[gsos_idx]
    Yn_t, X_t, Y_t = dagu.create_diff_data(d_p['M'], sel_GSOs, d_p['max_src_node'], d_p['n_p_x'], d_p['n_p_y'],
                                           d_p['n_sources'], src_t=d_p['src_t'], torch_tensor=True, verb=False)
    
    X_data = {'train': X_t[:d_p['M_train']], 'val': X_t[d_p['M_train']:-d_p['M_test']], 'test': X_t[-d_p['M_test']:]}
    Y_data = {'train': Yn_t[:d_p['M_train']], 'val': Yn_t[d_p['M_train']:-d_p['M_test']],
              'test': Y_t[-d_p['M_test']:]}
        
    return X_data, Y_data, sel_GSOs, gsos_idx

def run_exp(d_p, d_arc_args, d_mod_p, exps, verb=True):
    # Create error variables
    err = np.zeros((d_p['n_tries'], len(exps)))
    std = np.zeros((d_p['n_tries'], len(exps)))
    times = np.zeros((d_p['n_tries'], len(exps)))

    t_begin = time.time()
    # for i in range(d_p['n_tries']):
    with tqdm(total=d_p['n_tries']*len(exps), disable=False) as pbar:
        for i in range(d_p['n_tries']):
            Adj, W, GSOs, Psi = utils.get_graph_data(d_p, get_Psi=True)
            X_data, Y_data, sel_GSOs, sel_GSOs_idx = get_signals(d_p, GSOs)
            
            for j, exp in enumerate(exps):
                # Combine default and experiment parameters    
                arc_p = {**exp['arc_p']}
                arc_p['args'] = {**d_arc_args, **arc_p['args']} if 'args' in arc_p.keys() else {**d_arc_args}
                mod_p = {**d_mod_p, **exp['mod_p']} if 'mod_p' in exp.keys() else d_mod_p

                # Fit and test nonlinear models
                GSO = utils.select_GSO(arc_p, GSOs, sel_GSOs, W, Adj)
                K = GSO.shape[0] if isinstance(GSO, torch.Tensor) and len(GSO.shape) == 3 else 0  
                arch = utils.instantiate_arch(arc_p, K)                
                model = Model(arch, device=device)


                params = arch.n_params if hasattr(arch, 'n_params') else None 
                print(f'-{i}. {exp["leg"]}: n_params: {params}')

                continue 


                t_i = time.time()
                model.fit(X_data, Y_data, GSO, mod_p['lr'], mod_p['epochs'], mod_p['bs'], mod_p['wd'], patience=mod_p['pat'])
                t_e = time.time() - t_i
                err[i,j], std[i,j] = model.test(X_data['test'], Y_data['test'], GSO, exp['arc_p']['arch'])


            # times[i,j] = t_e

            params = arch.n_params if hasattr(arch, 'n_params') else None 
                
            # Progress
            pbar.update(1)
            if verb:
                print(f'-{i}. {exp["leg"]}: err: {err[i,j]:.3f} | std: {std[i,j]:.3f}  |' +
                      f' time: {times[i,j]:.1f} | n_params: {params}')

    total_t = (time.time() - t_begin)/60
    print(f'----- Ellapsed time: {total_t:.2f} minutes -----')
    return err, std, times

In [5]:
mod_p_init = default_mod_p.copy()
mod_p_init['pat'] = 50

verb = True

# Experiments to be run
Exps = [
    # Our Models
    {'arc_p': {'arch': FB_DAGConv, 'GSO': 'rnd_GSOs', 'n_gsos': 5}, 'leg': 'DCN-5'},
    
    {'arc_p': {'arch': ParallelMLPSum, 'input_dim': 1, 'hidden_dims': [32]*2, 'output_dim': 1, 'GSO': 'rnd_GSOs', 'n_inputs': 5, 'n_gsos': 5}, 'leg': 'PDCN-5'},

    {'arc_p': {'arch': SharedMLPSum, 'input_dim': 1, 'hidden_dims': [32]*2, 'output_dim': 1, 'GSO': 'rnd_GSOs', 'n_inputs': 5, 'n_gsos': 5}, 'leg': 'SDCN-5'},

    {'arc_p': {'arch': FB_DAGConv, 'GSO': 'rnd_GSOs', 'n_gsos': 5}, 'leg': 'DCN-5'},

    {'arc_p': {'arch': SharedMLPSum, 'input_dim': 1, 'hidden_dims': [32]*2, 'output_dim': 1, 'GSO': 'rnd_GSOs', 'n_inputs': 20, 'n_gsos': 20}, 'leg': 'SDCN-20'},

    {'arc_p': {'arch': ParallelMLPSum, 'input_dim': 1, 'hidden_dims': [32]*2, 'output_dim': 1, 'GSO': 'rnd_GSOs', 'n_inputs': 20, 'n_gsos': 20}, 'leg': 'PDCN-20'},

]

err, std, times = run_exp(data_p, default_arch_args, mod_p_init, Exps, verb=verb)

  0%|          | 0/6 [00:00<?, ?it/s]

-0. PDCN-20: err: 0.000 | std: 0.000  | time: 0.0 | n_params: 23060
----- Ellapsed time: 0.00 minutes -----
