In [1]:
import networkx as nx
import numpy as np
import torch
import utils
from model import CFCN
from utils.inference import CausalInference, find_element_in_list
import pandas as pd
import matplotlib.pyplot as plt
from datasets import reorder_dag, get_full_ordering

In [2]:

shuffling = 0
seed = 1
standardize = 0
sample_size = 100000
batch_size = 50
max_iters =  30000
eval_interval = 100
eval_iters = 100
validation_fraction = 0.1
np.random.seed(seed=seed)
torch.manual_seed(seed)
device = 'cuda'
dropout_rate = 0.0
learning_rate = 1e-3

neurons_per_layer = [4,8,4]


def generate_data(N):
    Ux = np.random.randn(N)
    X = Ux
    Ub = 0.01 * np.random.randn(N)
    B = Ub
    Uc = 0.01 * np.random.randn(N)
    C = Uc
    Uy = 0.01 * np.random.randn(N)
    Y = 0.5 * X + 0.5 * C + 0.4* B + Uy

    Y0 = 0.5 * C + 0.4* B + Uy
    Y1 = 0.5 + 0.5 * C + 0.4* B + Uy

    all_data_dict = {'X': X, 'B': B, 'C': C, 'Y': Y}

    # types can be 'cat' (categorical) 'cont' (continuous) or 'bin' (binary)
    var_types = {'X': 'cont', 'B': 'cont', 'C': 'cont', 'Y': 'cont'}

    DAGnx = nx.DiGraph()
    DAGnx.add_edges_from([('X', 'Y'), ('B', 'Y'), ('C', 'Y')])
    DAGnx = reorder_dag(dag=DAGnx)  # topologically sorted dag
    causal_ordering = get_full_ordering(DAGnx)
    var_names = list(DAGnx.nodes())  # topologically ordered list of variables
    all_data = np.stack([all_data_dict[key] for key in var_names], axis=1)

    return all_data, DAGnx, var_names, causal_ordering, var_types, Y0, Y1


## Exogenous Example

In [3]:
_, _, _, _, _, Y0, Y1 = generate_data(N=1000000)
ATE = (Y1 - Y0).mean()  # ATE based off a large sample
all_data, DAG, var_names, causal_ordering, var_types_sorted, Y0, Y1 = generate_data(N=sample_size)
print(var_names, ATE)

input_dim = all_data.shape[1]

# prepend the input size to neurons_per_layer if not included in neurons_per_layer
# append the intput size to neurons_per_layer (output) if not included in neurons_per_layer
neurons_per_layer.insert(0, input_dim)
neurons_per_layer.append(input_dim)
utils.assert_neuron_layers(layers=neurons_per_layer, input_size=input_dim)

indices = np.arange(0, len(all_data))
np.random.shuffle(indices)

val_inds = indices[:int(validation_fraction*len(indices))]
train_inds = indices[int(validation_fraction*len(indices)):]
train_data = all_data[train_inds]
val_data = all_data[val_inds]

train_data, val_data = torch.from_numpy(train_data).float(),  torch.from_numpy(val_data).float()

['X', 'B', 'C', 'Y'] 0.5


In [4]:
initial_adj_matrix = nx.to_numpy_array(DAG)

initial_masks = [torch.from_numpy(mask).float().to(torch.float64) for mask in
                 utils.expand_adjacency_matrix(neurons_per_layer[1:], initial_adj_matrix)]


model = CFCN(neurons_per_layer=neurons_per_layer, dag=DAG, causal_ordering=causal_ordering, var_types_sorted=var_types_sorted, dropout_rate=dropout_rate).to(device)
model.initialize_masks(initial_masks)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

TypeError: __init__() got an unexpected keyword argument 'var_types_sorted'

In [None]:
 def get_batch(train_data, val_data, split, device, batch_size):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(0, len(data), (batch_size,))
    x = data[ix]
    return x.to(device)

all_var_losses = {}
for iter_ in range(0, max_iters):
    # train and update the model
    model.train()

    xb = get_batch(train_data=train_data, val_data=val_data, split='train', device=device, batch_size=batch_size)
    xb_mod = torch.clone(xb.detach())
    X, loss, loss_dict = model(X=xb, targets=xb_mod, shuffling=shuffling)

    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()


    if iter_ % eval_interval == 0:  # evaluate the loss (no gradients)
        for key in loss_dict.keys():
            if key not in all_var_losses.keys():
                all_var_losses[key] = []
            all_var_losses[key].append(loss_dict[key])

        model.eval()
        eval_loss = {}
        for split in ['train', 'val']:
            losses = torch.zeros(eval_iters)
            for k in range(eval_iters):

                xb = get_batch(train_data=train_data, val_data=val_data, split=split, device=device,
                               batch_size=batch_size)
                xb_mod = torch.clone(xb.detach())
                X, loss, loss_dict = model(X=xb, targets=xb_mod, shuffling=False)
                losses[k] = loss.item()
            eval_loss[split] = losses.mean()
        print(f"step {iter_} of {max_iters}: train_loss {eval_loss['train']:.4f}, val loss {eval_loss['val']:.4f}")

In [None]:

df = pd.DataFrame(all_data, columns=var_names)
data_dict = df.to_dict(orient='list')
cause_var = 'X'
effect_var = 'Y'
effect_index = var_names.index(effect_var)

ci = CausalInference(model=model, device=device)


In [None]:
model.eval()
intervention_nodes_vals_0 = {'X': 0}
intervention_nodes_vals_1 = {'X': 1}
D0 = ci.forward(data=all_data , intervention_nodes_vals=intervention_nodes_vals_0)
D1 = ci.forward(data=all_data , intervention_nodes_vals=intervention_nodes_vals_1)

effect_var = 'Y'
effect_index = find_element_in_list(var_names, target_string=effect_var)

est_ATE = (D1[:,effect_index] - D0[:,effect_index]).mean()
print(ATE, est_ATE)