### Import packages

In [18]:
import torch
from dagma import utils
from dagma.linear import DagmaLinear
from dagma.nonlinear import DagmaMLP, DagmaNonlinear
# Import pandas
import pandas as pd


In [41]:
# reading csv file
df = pd.read_csv("../data/asia.csv")
print(df.head())


   Unnamed: 0   A    S    T   L    B    E    X    D
0           1  no  yes   no  no  yes   no   no  yes
1           2  no  yes   no  no   no   no   no   no
2           3  no   no  yes  no   no  yes  yes  yes
3           4  no   no   no  no  yes   no   no  yes
4           5  no   no   no  no   no   no   no  yes


### Generate data

In [57]:

df = df.replace({'yes': 1, 'no': 0})
# Remove column name 'A'
df=df.drop(columns=df.columns[0],axis=1)
# transform to the logit form
print(df.head())



   A  S  T  L  B  E  X  D
1  0  1  0  0  0  0  0  0
2  0  0  1  0  0  1  1  1
3  0  0  0  0  1  0  0  1
4  0  0  0  0  0  0  0  1
5  0  1  0  0  0  0  0  1


In [58]:
# convert to nparray
arr = df.to_numpy()
print(arr)

[[0 1 0 ... 0 0 0]
 [0 0 1 ... 1 1 1]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 1]
 [0 1 0 ... 0 0 1]]


In [59]:

model = DagmaLinear(loss_type='logistic') # create a linear model with least squares loss
W_est = model.fit(arr, lambda1=0.02) # fit the model with L1 reg. (coeff. 0.02)


  0%|          | 0/180000.0 [00:00<?, ?it/s]

In [None]:
print(W_est)

In [3]:
utils.set_random_seed(1)
# Create an Erdos-Renyi DAG of 20 nodes and 20 edges in expectation with Gaussian noise
# number of samples n = 500
n, d, s0 = 500, 20, 20 
graph_type, sem_type = 'ER', 'gauss'

B_true = utils.simulate_dag(d, s0, graph_type)
W_true = utils.simulate_parameter(B_true)
X = utils.simulate_linear_sem(W_true, n, sem_type)



  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


<_csv.reader object at 0x0000023CEF8DDE20>


### Test Linear Model

In [8]:
model = DagmaLinear(loss_type='l2') # create a linear model with least squares loss
W_est = model.fit(X, lambda1=0.02) # fit the model with L1 reg. (coeff. 0.02)
acc = utils.count_accuracy(B_true, W_est != 0) # compute metrics of estimated adjacency matrix W_est with ground-truth
print(acc)

  0%|          | 0/180000.0 [00:00<?, ?it/s]

{'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 20}


### Test Nonlinear Model

In [9]:
# note: using torch.double instead of torch.float gives better result for larger num of nodes
eq_model = DagmaMLP(dims=[d, 10, 1], bias=True, dtype=torch.double) # create the model for the structural equations, in this case MLPs
model = DagmaNonlinear(eq_model, dtype=torch.double) # create the model for DAG learning
W_est = model.fit(X, lambda1=0.02, lambda2=0.005) # fit the model with L1 reg. (coeff. 0.02) and L2 reg. (coeff. 0.005)
acc = utils.count_accuracy(B_true, W_est != 0) # compute metrics of estimated adjacency matrix W_est with ground-truth
print(acc)

  0%|          | 0/230000.0 [00:00<?, ?it/s]

{'fdr': 0.0, 'tpr': 1.0, 'fpr': 0.0, 'shd': 0, 'nnz': 20}
