Teste LGANM

In [1]:
import sempler
import numpy as np


# Connectivity matrix
W = np.array([[0, 0, 0, 0.1, 0],
              [0, 0, 2.1, 0, 0],
              [0, 0, 0, 3.2, 0],
              [0, 0, 0, 0, 5.0],
              [0, 0, 0, 0, 0]])

# All together
lganm = sempler.LGANM(W, (0, 1), (0, 1))

# Sampling from the observational setting
samples = lganm.sample(100)

# Sampling under a shift intervention on variable 1 with standard gaussian noise
samples = lganm.sample(100, shift_interventions={1: (0, 1)})

# Sampling the observational environment in the "population setting"
distribution = lganm.sample(population=True)



In [12]:
print(distribution)

mean:
[ 0.07063424  0.82076854  1.85522887  6.43567917 32.58087994]
covariance:
[[1.37192846e-01 0.00000000e+00 0.00000000e+00 1.37192846e-02
  6.85964232e-02]
 [0.00000000e+00 2.28100741e-01 4.79011557e-01 1.53283698e+00
  7.66418491e+00]
 [0.00000000e+00 4.79011557e-01 1.92827632e+00 6.17048422e+00
  3.08524211e+01]
 [1.37192846e-02 1.53283698e+00 6.17048422e+00 2.00654081e+01
  1.00327040e+02]
 [6.85964232e-02 7.66418491e+00 3.08524211e+01 1.00327040e+02
  5.01839834e+02]]


Teste ANM


In [2]:
import sempler
import sempler.noise as noise
import numpy as np

# Connectivity matrix
A = np.array([[0, 0, 0, 1, 0],
              [0, 0, 1, 0, 0],
              [0, 0, 0, 1, 0],
              [0, 0, 0, 0, 1],
              [0, 0, 0, 0, 0]])

# Noise distributions (see sempler.noise)
noise_distributions = [noise.normal(0, 1)] * 5

# Variable assignments
functions = [None, None, np.sin, lambda x: np.exp(x[:, 0]) + 2 * x[:, 1], lambda x: 2 * x]

# All together
anm = sempler.ANM(A, functions, noise_distributions)

# Sampling from the observational setting
samples1 = anm.sample(100)


# Sampling under a noise intervention on variable 1
samples2 = anm.sample(100, noise_interventions={1: noise.normal(0, 1)})

# Sampling under a noise intervention on variable 0 and a do intervention on variable 2:

samples3 = anm.sample(100,
                     noise_interventions={0: noise.normal()},
                     do_interventions={2: noise.uniform()})

In [21]:
print(np.mean(samples1, axis=0))

[0.1414424  0.04006606 0.1214319  2.49015961 5.01002124]


In [41]:
B = np.array([[0,1,0],
             [0,0,1],
             [0,0,0]])

noise_distributions = [noise.normal(0, 1)] * 3
functions = [
    None,
    lambda x: ( x + 1 ) * 3,
    lambda x:  x * 2
]
anm = sempler.ANM(B, functions, noise_distributions)
samples1 = anm.sample(100)
print(np.mean(samples1, axis=0))

[-4.66304528e-03  2.86553921e+00  5.61652794e+00]


In [3]:
from sempler.generators import dag_avg_deg, intervention_targets

dag_avg_deg(p=5, k=2, w_min=0.1, w_max=5.0)

array([[0.        , 0.        , 0.51285762, 0.        , 0.        ],
       [0.        , 0.        , 4.54277831, 0.        , 0.39388172],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 3.36973466, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ]])

Data Generation for av ICP

In [10]:
import pandas as pd

outer = 300
inner = 300
p = 6
k = 2
w_min=0.5
w_max=5
K = 5 # Number of interventions

# Define means and variances for the noise distributions
means = np.array([-2,-1,0,1,2,3])  # Means for the noise distributions
variances = np.ones(p) 

for i in range(outer):
    dag = dag_avg_deg(p=p, k=k, w_min=w_min, w_max=w_max, random_state=i)
    
    env = np.zeros(inner, dtype=int)  # Environment for observational data initialized to 0

    dat = sempler.LGANM(dag, means = means, variances = variances).sample(inner)
    dat = np.hstack([dat, env.reshape(-1,1)])  # Add environment column
    
    
    int_target = np.array(intervention_targets(p-1, K, size = 1,replace=False, random_state=i)).flatten()
    int_target = int_target + 1 # covariate 0 is not intervened on, so we shift the indices by 1
    
    for k in range(K):
        
        env = np.full(inner, int_target[k], dtype=int)  # Update environment column for each intervention
        dat_tmp = sempler.LGANM(dag, means = means, variances = variances).sample(inner, shift_interventions={int_target[k]: (-10, 0)}) # shift intervention by mean = -10, variance = 0
        dat_tmp = np.hstack([dat_tmp, env.reshape(-1,1),])  # Add environment column
        dat = np.vstack([dat, dat_tmp])
    
    # Convert to DataFrame for easy saving
    df = pd.DataFrame(dat)

    # Save the data to a file
    filename = f"new_data_{i}.csv"
    dagname = f"new_dag_{i}.txt"
    
    # Save as CSV
    df.to_csv(filename, index=False)
    np.savetxt(dagname, dag, delimiter=',', fmt='%.6f')
    
      
# print(dat[:5])  
    


The next step is exporting the data to load in R
