In [1]:
# Import all the necessary modules
%load_ext autoreload
%autoreload 2
from dagrad import dagrad # dagrad is the main class for learning the structure of a DAG
from dagrad import generate_linear_data, generate_nonlinear_data, count_accuracy, threshold_till_dag
import torch
import numpy as np
import matplotlib.pyplot as plt 

No GPU automatically detected. Setting SETTINGS.GPU to 0, and SETTINGS.NJOBS to cpu_count.


In [2]:
def postprocess(B, graph_thres=0.3):
    """Post-process estimated solution:
        (1) Thresholding.
        (2) Remove the edges with smallest absolute weight until a DAG
            is obtained.

    Args:
        B (numpy.ndarray): [d, d] weighted matrix.
        graph_thres (float): Threshold for weighted matrix. Default: 0.3.

    Returns:
        numpy.ndarray: [d, d] weighted matrix of DAG.
    """
    B = np.copy(B)
    B[np.abs(B) <= graph_thres] = 0    # Thresholding
    B, _ = threshold_till_dag(B)

    return B

### Linear SEM - EV method

In [3]:
def golem_ev(n, d, s0, graph_type, noise_type, error_var, seed=None):
    X, W_true, B_true = generate_linear_data(n,d,s0,graph_type,noise_type,error_var,seed)
    X = torch.from_numpy(X).float()
    model = 'linear' # Define the model
    W_golem = dagrad(
        X,
        model = model,
        method = 'dagma',
        compute_lib='torch',
        loss_fn='user_loss',
        reg='user_reg',
        h_fn='user_h',
        general_options={'user_params': {
            'equal_variances': True,
        }}
    ) # Learn the structure of the DAG using Golem
    print(f"Linear Model")
    print(f"data size: {n}, graph type: {graph_type}, nodes: {d}, edges: {s0}, sem type: {noise_type}")

    W_processed = postprocess(W_golem)
    acc_golem = count_accuracy(B_true, W_processed != 0) # Measure the accuracy of the learned structure using Golem
    print('Accuracy of Golem:', acc_golem)

    return acc_golem

In [4]:
golem_ev(1000, 100, 50, 'ER', 'gauss', 'eq', seed=2)

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 50, sem type: gauss
Accuracy of Golem: {'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'sid': 0.0, 'nnz': 50}


{'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'sid': 0.0, 'nnz': 50}

In [5]:
# ER1 graph with 100 nodes, as in https://arxiv.org/pdf/2006.10201 5.1
n, d, s0, graph_type, noise_type = 1000, 100, 50, 'ER', 'gauss' # Define the parameters of the data
X, W_true, B_true = generate_linear_data(n,d,s0,graph_type,noise_type, error_var='eq',seed  =2) # Generate the data
X = torch.from_numpy(X).float()
model = 'linear' # Define the model
W_dagma = dagrad(
    X,
    model = model,
    method = 'dagma',
    compute_lib='torch',
) # Learn the structure of the DAG using Dagma
print(f"Linear Model")
print(f"data size: {n}, graph type: {graph_type}, sem type: {noise_type}")

acc_dagma = count_accuracy(B_true, W_dagma != 0) # Measure the accuracy of the learned structure using Dagma
print('Accuracy of Dagma:', acc_dagma)


Linear Model
data size: 1000, graph type: ER, sem type: gauss
Accuracy of Dagma: {'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 50}


In [6]:
golem_ev(1000, 100, 200, 'ER', 'gauss', 'eq', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 200, sem type: gauss
Accuracy of Golem: {'fdr': 0.009950248756218905, 'tpr': 0.995, 'fpr': 0.0004210526315789474, 'shd': 2, 'nnz': 201}


{'fdr': 0.009950248756218905,
 'tpr': 0.995,
 'fpr': 0.0004210526315789474,
 'shd': 2,
 'nnz': 201}

In [7]:
# ER4 graph with 100 nodes, as in https://arxiv.org/pdf/2006.10201 5.1
n, d, s0, graph_type, noise_type = 1000, 100, 200, 'ER', 'gauss' # Define the parameters of the data
X, W_true, B_true = generate_linear_data(n,d,s0,graph_type,noise_type,error_var='eq',seed  =2) # Generate the data
X = torch.from_numpy(X).float()
model = 'linear' # Define the model
W_dagma = dagrad(
    X,
    model = model,
    method = 'dagma',
    compute_lib='torch',
) # Learn the structure of the DAG using Dagma
print(f"Linear Model")
print(f"data size: {n}, graph type: {graph_type}, sem type: {noise_type}")

acc_dagma = count_accuracy(B_true, W_dagma != 0) # Measure the accuracy of the learned structure using Dagma
print('Accuracy of Dagma:', acc_dagma)


Linear Model
data size: 1000, graph type: ER, sem type: gauss
Accuracy of Dagma: {'fdr': 0.0, 'tpr': 0.985, 'fpr': 0.0, 'shd': 3, 'nnz': 197}


### Linear SEM with EV noise - Two-stage method

In [8]:
def golem_nv(n, d, s0, graph_type, sem_type, error_var, seed=None):
    X, W_true, B_true = generate_linear_data(n,d,s0,graph_type,sem_type,error_var,seed)
    X = torch.from_numpy(X).float()
    model = 'linear' # Define the model
    W_ev = dagrad(
        X,
        model = model,
        method = 'dagma',
        compute_lib='torch',
        loss_fn='user_loss',
        reg='user_reg',
        h_fn='user_h',
        general_options={'user_params': {
            'equal_variances': True,
        }}
    ) # Learn the structure of the DAG using Golem
    print(f"Linear Model")
    print(f"data size: {n}, graph type: {graph_type}, nodes: {d}, edges: {s0}, sem type: {sem_type}")

    W_ev_processed = postprocess(W_ev)
    acc_ev = count_accuracy(B_true, W_ev_processed != 0) # Measure the accuracy of the learned structure using Golem
    print('Accuracy of Golem after EV stage:', acc_ev)

    W_nv = dagrad(
        X,
        model = model,
        method = 'dagma',
        compute_lib='torch',
        loss_fn='user_loss',
        reg='user_reg',
        h_fn='user_h',
        general_options={'user_params': {
            'equal_variances': False,
        },
        'initialization': W_ev}
    ) 

    W_processed = postprocess(W_nv)
    acc_nv = count_accuracy(B_true, W_processed != 0) # Measure the accuracy of the learned structure using Golem
    print('Accuracy of Golem after NV stage:', acc_nv)

    return acc_nv


In [9]:
golem_nv(1000, 100, 200, 'ER', 'gauss', 'eq', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 200, sem type: gauss
Accuracy of Golem after EV stage: {'fdr': 0.009950248756218905, 'tpr': 0.995, 'fpr': 0.0004210526315789474, 'shd': 2, 'nnz': 201}
Accuracy of Golem after NV stage: {'fdr': 0.009950248756218905, 'tpr': 0.995, 'fpr': 0.0004210526315789474, 'shd': 2, 'nnz': 201}


{'fdr': 0.009950248756218905,
 'tpr': 0.995,
 'fpr': 0.0004210526315789474,
 'shd': 2,
 'nnz': 201}

In [11]:
golem_nv(1000, 100, 50, 'ER', 'exp', 'eq', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 50, sem type: exp
Accuracy of Golem after EV stage: {'fdr': 0.09259259259259259, 'tpr': 0.98, 'fpr': 0.0010204081632653062, 'shd': 5, 'nnz': 54}
Accuracy of Golem after NV stage: {'fdr': 0.14035087719298245, 'tpr': 0.98, 'fpr': 0.0016326530612244899, 'shd': 8, 'nnz': 57}


{'fdr': 0.14035087719298245,
 'tpr': 0.98,
 'fpr': 0.0016326530612244899,
 'shd': 8,
 'nnz': 57}

In [12]:
golem_nv(1000, 100, 50, 'ER', 'gauss', 'eq', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 50, sem type: gauss
Accuracy of Golem after EV stage: {'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 50}
Accuracy of Golem after NV stage: {'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 50}


{'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 50}

In [13]:
golem_nv(1000, 100, 200, 'ER', 'gauss', 'eq', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 200, sem type: gauss
Accuracy of Golem after EV stage: {'fdr': 0.009950248756218905, 'tpr': 0.995, 'fpr': 0.0004210526315789474, 'shd': 2, 'nnz': 201}
Accuracy of Golem after NV stage: {'fdr': 0.009950248756218905, 'tpr': 0.995, 'fpr': 0.0004210526315789474, 'shd': 2, 'nnz': 201}


{'fdr': 0.009950248756218905,
 'tpr': 0.995,
 'fpr': 0.0004210526315789474,
 'shd': 2,
 'nnz': 201}

### Linear SEM with NV noise - Two-stage method

In [14]:
golem_nv(1000, 100, 50, 'ER', 'gumbel', 'random', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 100, edges: 50, sem type: gumbel
Accuracy of Golem after EV stage: {'fdr': 0.7972972972972973, 'tpr': 0.9, 'fpr': 0.03612244897959184, 'shd': 180, 'nnz': 222}
Accuracy of Golem after NV stage: {'fdr': 0.17543859649122806, 'tpr': 0.94, 'fpr': 0.0020408163265306124, 'shd': 10, 'nnz': 57}


{'fdr': 0.17543859649122806,
 'tpr': 0.94,
 'fpr': 0.0020408163265306124,
 'shd': 10,
 'nnz': 57}

In [15]:
golem_nv(1000, 20, 10, 'ER', 'gauss', 'random', seed=2)

Linear Model
data size: 1000, graph type: ER, nodes: 20, edges: 10, sem type: gauss
Accuracy of Golem after EV stage: {'fdr': 0.7674418604651163, 'tpr': 1.0, 'fpr': 0.18333333333333332, 'shd': 33, 'nnz': 43}
Accuracy of Golem after NV stage: {'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 10}


{'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 10}