## Importing libraries

In [1]:
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd

## Loading data

In [2]:
subset = pd.read_csv("subset.csv", header = 0, index_col = 0)
subset

Unnamed: 0,Everything,Telencephalon_L_501_5,CerebralCortex_L_482_4,Limbic_L_434_3,Hippo_L_338_2,Amyg_L_336_2,Hippo_L_75_1,Amyg_L_73_1
Everything,0,1,0,0,0,0,0,0
Telencephalon_L_501_5,0,0,1,0,0,0,0,0
CerebralCortex_L_482_4,0,0,0,1,0,0,0,0
Limbic_L_434_3,0,0,0,0,1,1,0,0
Hippo_L_338_2,0,0,0,0,0,0,1,0
Amyg_L_336_2,0,0,0,0,0,0,0,1
Hippo_L_75_1,0,0,0,0,0,0,0,0
Amyg_L_73_1,0,0,0,0,0,0,0,0


In [3]:
M = subset.shape[0] # Number of total unique structures
S = np.array(subset, dtype = bool)
names_subset = subset.columns # List of the 8 structures' names
n_repeats = 100 # Number of repeats per case per experiment
nperm = 1000 # Number of permutations when permutation testing

## Defining functions

In [4]:
# Define the function for phi (CDF of the standard normal distribution)
def phi(x,mu=0.0):
    '''Gaussian'''
    return 1.0/np.sqrt(2.0*np.pi)*np.exp(-(x - mu)**2/2.0)

In [5]:
# Define the function for calculating P (probabilities of Z) from Q (conditional probability of Z given parent)
def P_from_Q(Q,Ancestors_and_self):
    '''I don't need this function
    '''
    P = np.empty_like(Q)
    for i in range(M):
        P[i] = np.prod(Q[Ancestors_and_self[i,:]])
    return P

In [6]:
# Define the function for calculating Q (conditional probability of Z given parent) from P (probabilities of Z)
def Q_from_P(P,A):
    # now we need to calculate Q
    Q = np.zeros_like(P)
    Q[0] = P[0]
    for i in range(1,M):
        Q[i] = P[i] / P[A[:,i]]
    return Q

In [7]:
# Define the function for estimating P
def estimate_P(X,mu,A,Descendants_and_self,draw=False,niter=100,P0=None,names=None,clip=0.001):
    
    if draw: 
        f,ax = plt.subplots(2,2)
        if names is None:
            names = np.arange(A.shape[0])

    N = X.shape[0]
    m = X.shape[1]
    M = A.shape[0]
    
    # okay now comes my algorithm
    # initialize
    if P0 is None:
        P = np.ones(M)*0.5
    else:
        P = np.asarray(P0)
    
    for it in range(niter):
        # calculate leaf posterior (this is prob of no effect)
        #leaf_posterior = ((1.0-P[is_leaf])*phi(X))
        #leaf_posterior = leaf_posterior/(leaf_posterior + P[is_leaf]*phi(X,mu) )
        P_ = np.maximum(P, clip) # Clip probability: if P is very small, then set it to 0.001
        P_ = np.minimum(P_, 1-clip) # Clip probability: if P_ is very big, then set it to 0.999
        P_over_one_minus_P = P_/(1.0-P_)
        #leaf_log_posterior = -np.log(1.0 + P_over_one_minus_P[is_leaf]*phi(X,mu)/phi(X) )
        leaf_log_posterior = -np.log1p( P_over_one_minus_P[is_leaf]*phi(X,mu)/phi(X) )
        

        # calculate posterior for all structures
        # now for each structure, I need a leaf likelihod, and an adjustment
        #posterior = np.zeros((N,M))
        log_posterior = np.zeros((N,M))
        for i in range(M):
            #posterior[:,i] = np.prod(leaf_posterior[:,Descendants_and_self[i,:][is_leaf]],1)
            log_posterior[:,i] = np.sum(leaf_log_posterior[:,Descendants_and_self[i,:][is_leaf]],1)
        
        # calculate adjustment factor for correlations
        Q = Q_from_P(P,A)
        #adjustment_single = np.ones(M)
        log_adjustment_single = np.zeros(M)
        for i in range(M):
            if is_leaf[i]:
                continue
            #adjustment_single[i] = (1.0 - P[i])/ ((1.0 - P[i]) + P[i]*np.prod(1.0 - Q[A[i,:]]))
            #log_adjustment_single[i] = -np.log(1.0 + P_over_one_minus_P[i]*np.prod(1.0 - Q[A[i,:]]))
            log_adjustment_single[i] = -np.log1p(P_over_one_minus_P[i]*np.prod(1.0 - Q[A[i,:]]))
            
        
        # now my adjust ment requres products of all descendants
        #adjustment = np.ones(M)
        log_adjustment = np.ones(M)
        for i in range(M):
            #adjustment[i] = np.prod(adjustment_single[Descendants_and_self[i,:]])
            log_adjustment[i] = np.sum(log_adjustment_single[Descendants_and_self[i,:]])
            

        # calculate the adjusted posterior
        #posterior = posterior*adjustment
        log_posterior = log_posterior + log_adjustment
        
        #P = np.sum(1.0 - posterior,0)/N        
        #P = np.sum(1.0 - np.exp(log_posterior),0)/N
        P = -np.sum(np.expm1(log_posterior),0)/N
        posterior = np.exp(log_posterior)
        
        # draw        
        if draw>0 and ( (not it%draw) or (it==niter-1)):     
            
            ax[0,0].cla()
            ax[0,0].imshow(posterior, vmin = 0, vmax = 1)
            ax[0,0].set_aspect('auto')
            ax[0,0].set_title('P[Z=0|X] (prob not affected)')
            ax[0,0].set_xticks(np.arange(M))
            ax[0,0].set_xticklabels(names,rotation=15, fontsize = 5)
            ax[0,0].set_ylabel('Sample')

            ax[0,1].cla()
            ax[0,1].bar(np.arange(M),P)
            ax[0,1].set_xticks(np.arange(M))
            ax[0,1].set_xticklabels(names,rotation=15, fontsize = 5)
            ax[0,1].set_ylim((0, 1))

            f.canvas.draw()
    return P

In [8]:
Descendants = np.copy(S)

# Adding additional generations to the descendants 
# Repeat 6 times since we have 5 generations (1, 2, 3, 4, 5)
Descendants = np.logical_or(Descendants,Descendants@S)
Descendants = np.logical_or(Descendants,Descendants@S)
Descendants = np.logical_or(Descendants,Descendants@S)
Descendants = np.logical_or(Descendants,Descendants@S)
Descendants = np.logical_or(Descendants,Descendants@S)
Descendants = np.logical_or(Descendants,Descendants@S)
Descendants_and_self = np.logical_or(Descendants, np.eye(M))

## Experiment 1: clip the probabilities at 0.999 and 0.001 (original)

### Case 1: nothing is affected

In [9]:
for j in range(n_repeats):
    experiment_1_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset1_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset1_1, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset1_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset1_1)[::-1]
    pval = np.zeros_like(P_subset1_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset1_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_1_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset1_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_1_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_1_1_outputs)
    

In [10]:
# Viewing the contents of one of the .npz files 
# Assign the file to an object called "test," which is a dictionary object
test = np.load("case_1_experiment_1_repeat_0000.npz")

# Print each key in the dictionary 
for key in test: 
    print(key)

# Print the values corresponding to this key
print(test["arr_0"])

arr_0
['Limbic_L_434_3, P[Z=1|X]=0.037920449823881666, p=0.647'
 'CerebralCortex_L_482_4, P[Z=1|X]=0.037920449823881666, p=0.647'
 'Telencephalon_L_501_5, P[Z=1|X]=0.037920449823881666, p=0.647'
 'Everything, P[Z=1|X]=0.037920449823881666, p=0.647'
 'Hippo_L_75_1, P[Z=1|X]=0.0003457965129371767, p=0.88'
 'Hippo_L_338_2, P[Z=1|X]=0.0003457965129371767, p=0.88'
 'Amyg_L_73_1, P[Z=1|X]=8.875288279312776e-05, p=0.671'
 'Amyg_L_336_2, P[Z=1|X]=8.875288279312776e-05, p=0.671']


### Case 2: left hippocampus is affected

In [11]:
for j in range(n_repeats):
    experiment_1_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset1_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset1_2, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset1_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset1_2)[::-1]
    pval = np.zeros_like(P_subset1_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset1_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_1_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset1_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_1_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_1_2_outputs)
    

### Case 3: both are affected

In [12]:
for j in range(n_repeats):
    experiment_1_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset1_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset1_3, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset1_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset1_3)[::-1]
    pval = np.zeros_like(P_subset1_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset1_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_1_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset1_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_1_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_1_3_outputs)
    

### Case 4: one is affected

In [9]:
for j in range(n_repeats):
    experiment_1_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset1_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset1_4, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset1_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset1_4)[::-1]
    pval = np.zeros_like(P_subset1_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset1_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_1_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset1_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_1_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_1_4_outputs, Z)
    

## Experiment 2: clip the probabilities at 0.9999 and 0.0001

### Case 1: nothing is affected

In [14]:
for j in range(n_repeats):
    experiment_2_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset2_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset2_1, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset2_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, 
                             P0=P0, niter=niter, names=names_subset, clip = 0.0001)
    # Set draw = 0 to prevent drawing the graphs
    # Set clip = 0.0001 for this case
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0, clip = 0.0001)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset2_1)[::-1]
    pval = np.zeros_like(P_subset2_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset2_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_2_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset2_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_2_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_2_1_outputs)
    

### Case 2: left hippocampus is affected

In [15]:
for j in range(n_repeats):
    experiment_2_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset2_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset2_2, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset2_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, 
                             P0=P0, niter=niter, names=names_subset, clip=0.0001)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0, clip=0.0001)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset2_2)[::-1]
    pval = np.zeros_like(P_subset2_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset2_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_2_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset2_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_2_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_2_2_outputs)
    

### Case 3: both are affected

In [16]:
for j in range(n_repeats):
    experiment_2_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset2_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset2_3, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset2_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, 
                             P0=P0, niter=niter, names=names_subset, clip = 0.0001)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0, clip = 0.0001)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset2_3)[::-1]
    pval = np.zeros_like(P_subset2_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset2_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_2_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset2_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_2_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_2_3_outputs)
    

### Case 4: one is affected

In [10]:
for j in range(n_repeats):
    experiment_2_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset2_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset2_4, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset2_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, 
                             P0=P0, niter=niter, names=names_subset, clip = 0.0001)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0, clip = 0.0001)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset2_4)[::-1]
    pval = np.zeros_like(P_subset2_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset2_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_2_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset2_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_2_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_2_4_outputs, Z)
    

## Experiment 3: run 20 iterations of the EM algorithm (original)

### Case 1: nothing is affected

In [18]:
for j in range(n_repeats):
    experiment_3_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset3_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset3_1, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset3_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset3_1)[::-1]
    pval = np.zeros_like(P_subset3_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset3_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_3_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset3_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_3_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_3_1_outputs)
    

### Case 2: left hippocampus is affected

In [19]:
for j in range(n_repeats):
    experiment_3_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset3_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset3_2, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset3_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset3_2)[::-1]
    pval = np.zeros_like(P_subset3_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset3_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_3_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset3_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_3_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_3_2_outputs)
    

### Case 3: both are affected

In [9]:
for j in range(n_repeats):
    experiment_3_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset3_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset3_3, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset3_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset3_3)[::-1]
    pval = np.zeros_like(P_subset3_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset3_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_3_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset3_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_3_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_3_3_outputs)
    

### Case 4: one is affected

In [11]:
for j in range(n_repeats):
    experiment_3_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset3_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset3_4, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset3_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset3_4)[::-1]
    pval = np.zeros_like(P_subset3_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset3_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_3_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset3_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_3_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_3_4_outputs, Z)
    

## Experiment 4: run 50 iterations of the EM algorithm 

### Case 1: nothing is affected

In [11]:
for j in range(n_repeats):
    experiment_4_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset4_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset4_1, S)

    P0 = np.ones(M) * 0.5
    niter = 50
    P_subset4_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset4_1)[::-1]
    pval = np.zeros_like(P_subset4_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset4_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_4_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset4_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_4_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_4_1_outputs)
    

### Case 2: left hippocampus is affected

In [9]:
for j in range(n_repeats):
    experiment_4_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset4_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset4_2, S)

    P0 = np.ones(M) * 0.5
    niter = 50
    P_subset4_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset4_2)[::-1]
    pval = np.zeros_like(P_subset4_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset4_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_4_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset4_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_4_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_4_2_outputs)
    

### Case 3: both are affected

In [10]:
for j in range(n_repeats):
    experiment_4_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset4_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset4_3, S)

    P0 = np.ones(M) * 0.5
    niter = 50
    P_subset4_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset4_3)[::-1]
    pval = np.zeros_like(P_subset4_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset4_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_4_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset4_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_4_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_4_3_outputs)
    

### Case 4: one is affected

In [12]:
for j in range(n_repeats):
    experiment_4_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset4_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset4_4, S)

    P0 = np.ones(M) * 0.5
    niter = 50
    P_subset4_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset4_4)[::-1]
    pval = np.zeros_like(P_subset4_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset4_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_4_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset4_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_4_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_4_4_outputs, Z)
    

## Experiment 5: run 100 iterations of the EM algorithm

### Case 1: nothing is affected

In [10]:
for j in range(n_repeats):
    experiment_5_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset5_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset5_1, S)

    P0 = np.ones(M) * 0.5
    niter = 100
    P_subset5_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset5_1)[::-1]
    pval = np.zeros_like(P_subset5_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset5_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_5_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset5_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_5_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_5_1_outputs)
    

### Case 2: left hippocampus is affected

In [9]:
for j in range(n_repeats):
    experiment_5_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset5_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset5_2, S)

    P0 = np.ones(M) * 0.5
    niter = 100
    P_subset5_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset5_2)[::-1]
    pval = np.zeros_like(P_subset5_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset5_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_5_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset5_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_5_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_5_2_outputs)
    

### Case 3: both are affected

In [10]:
for j in range(n_repeats):
    experiment_5_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset5_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset5_3, S)

    P0 = np.ones(M) * 0.5
    niter = 100
    P_subset5_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset5_3)[::-1]
    pval = np.zeros_like(P_subset5_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset5_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_5_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset5_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_5_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_5_3_outputs)
    

### Case 4: one is affected

In [9]:
for j in range(n_repeats):
    experiment_5_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset5_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset5_4, S)

    P0 = np.ones(M) * 0.5
    niter = 100
    P_subset5_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset5_4)[::-1]
    pval = np.zeros_like(P_subset5_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset5_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_5_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset5_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_5_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_5_4_outputs, Z)
    

## Experiment 6: initial probabilities are 0.5 (original)

### Case 1: nothing is affected

In [12]:
for j in range(n_repeats):
    experiment_6_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset6_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset6_1, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset6_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset6_1)[::-1]
    pval = np.zeros_like(P_subset6_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset6_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_6_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset6_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_6_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_6_1_outputs)
    

### Case 2: left hippocampus is affected

In [13]:
for j in range(n_repeats):
    experiment_6_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset6_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset6_2, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset6_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset6_2)[::-1]
    pval = np.zeros_like(P_subset6_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset6_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_6_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset6_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_6_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_6_2_outputs)
    

### Case 3: both are affected

In [14]:
for j in range(n_repeats):
    experiment_6_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset6_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset6_3, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset6_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset6_3)[::-1]
    pval = np.zeros_like(P_subset6_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset6_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_6_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset6_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_6_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_6_3_outputs)
    

### Case 4: one is affected

In [10]:
for j in range(n_repeats):
    experiment_6_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset6_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset6_4, S)

    P0 = np.ones(M) * 0.5
    niter = 20
    P_subset6_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset6_4)[::-1]
    pval = np.zeros_like(P_subset6_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset6_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_6_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset6_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_6_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_6_4_outputs, Z)
    

## Experiment 7: initial probabilities are 0.25

### Case 1: nothing is affected

In [16]:
for j in range(n_repeats):
    experiment_7_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset7_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset7_1, S)

    P0 = np.ones(M) * 0.25
    niter = 20
    P_subset7_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset7_1)[::-1]
    pval = np.zeros_like(P_subset7_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset7_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_7_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset7_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_7_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_7_1_outputs)
    

### Case 2: left hippocampus is affected

In [9]:
for j in range(n_repeats):
    experiment_7_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset7_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset7_2, S)

    P0 = np.ones(M) * 0.25
    niter = 20
    P_subset7_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset7_2)[::-1]
    pval = np.zeros_like(P_subset7_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset7_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_7_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset7_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_7_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_7_2_outputs)
    

### Case 3: both are affected

In [10]:
for j in range(n_repeats):
    experiment_7_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset7_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset7_3, S)

    P0 = np.ones(M) * 0.25
    niter = 20
    P_subset7_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset7_3)[::-1]
    pval = np.zeros_like(P_subset7_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset7_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_7_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset7_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_7_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_7_3_outputs)
    

### Case 4: one is affected

In [11]:
for j in range(n_repeats):
    experiment_7_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset7_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset7_4, S)

    P0 = np.ones(M) * 0.25
    niter = 20
    P_subset7_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset7_4)[::-1]
    pval = np.zeros_like(P_subset7_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset7_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_7_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset7_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_7_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_7_4_outputs, Z)
    

## Experiment 8: initial probabilities are 0.75

### Case 1: nothing is affected

In [12]:
for j in range(n_repeats):
    experiment_8_1_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Don't set Naffected to 0 or else there won't be any samples
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset8_1 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset8_1, S)

    P0 = np.ones(M) * 0.75
    niter = 20
    P_subset8_1 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset8_1)[::-1]
    pval = np.zeros_like(P_subset8_1)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset8_1[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_8_1_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset8_1[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_1_experiment_8_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_8_1_outputs)
    

### Case 2: left hippocampus is affected

In [13]:
for j in range(n_repeats):
    experiment_8_2_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset8_2 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset8_2, S)

    P0 = np.ones(M) * 0.75
    niter = 20
    P_subset8_2 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset8_2)[::-1]
    pval = np.zeros_like(P_subset8_2)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset8_2[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_8_2_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset7_2[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_2_experiment_8_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_8_2_outputs)
    

### Case 3: both are affected

In [14]:
for j in range(n_repeats):
    experiment_8_3_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            Z[i][6] = 1 # Left hippocampus is affected
            Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset8_3 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset8_3, S)

    P0 = np.ones(M) * 0.75
    niter = 20
    P_subset8_3 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset8_3)[::-1]
    pval = np.zeros_like(P_subset8_3)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset8_3[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_8_3_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset8_3[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_3_experiment_8_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_8_3_outputs)
    

### Case 4: one is affected

In [12]:
for j in range(n_repeats):
    experiment_8_4_outputs = [] # Empty list for each iteration
    
    ### GENERATING SAMPLES ### 
    
    N = 20 # Number of samples
    mu = 3.0 # Difference in mean, note that this is generally unknown. 
    M = subset.shape[0] # Number of total unique structures
    number_of_leaves = np.count_nonzero(np.sum(subset, 1) == 0) # Number of leaf structures (zero children)

    Z = np.zeros((N,M)) # Initialize Z, which will be a binary variable that tells us if a structure is affected
    Naffected = N // 2 # Affected samples (roughly half), note that this is generally unknown
    for i in range(N):
        if i < Naffected: # Assume that the first half of samples are affected and second half are not
            if np.random.rand() < 0.5:
                Z[i][6] = 1 # Left hippocampus is affected
            else:
                Z[i][7] = 1 # Left amygdala is affected
            
    is_leaf = np.concatenate([np.ones(number_of_leaves), np.zeros(M - number_of_leaves)]) # 1 for leaf structures, 0 for non-leaf structures
    is_leaf = np.array(is_leaf, dtype = bool) # Convert is_leaf to the boolean type
    is_leaf = is_leaf[::-1] # Data specific
    m = np.sum(is_leaf) # Number of leaf structures (m = 2)
            
    G = np.arange(N) < Naffected # All falses since all samples are unaffected
    X = Z[:, is_leaf > 0] * mu + np.random.randn(N, m)
    
    ### PARAMETER ESTIMATION ###
    
    P_subset8_4 = np.ones(M) * 0.5 # Array of 8 copies of 0.5
    Q = Q_from_P(P_subset8_4, S)

    P0 = np.ones(M) * 0.75
    niter = 20
    P_subset8_4 = estimate_P(X[G], mu, S, Descendants_and_self, draw=0, P0=P0, niter=niter, names=names_subset)
    # Set draw = 0 to prevent drawing the graphs
    
    ### GENERATING PERMUTED DATA ###
    
    Ps = []
    for n in range(nperm):
        Xp = X[np.random.permutation(N)[G]]
        P_ = estimate_P(Xp,mu,S,Descendants_and_self,draw=0,niter=niter,P0=P0)
        Ps.append(P_)

    Ps_sort = np.array([np.sort(Pi)[::-1] for Pi in Ps])
    #Ps_sort = np.array(Ps)
    
    ### PERMUTATION TESTING ###
    
    inds = np.argsort(P_subset8_4)[::-1]
    pval = np.zeros_like(P_subset8_4)
    alpha = 0.05
    for i in range(M):    
    
        pval[inds[i]] = np.mean(Ps_sort[:,i] >= P_subset8_4[inds[i]])
        #if pval[inds[i]] > 0.05:
         #   break # Every structure that is not rejected
    
        experiment_8_4_outputs.append(f"{names_subset[inds[i]]}, P[Z=1|X]={P_subset8_4[inds[i]]}, p={pval[inds[i]]}") 
        # Every structure that gets rejected gets an entry
        
    file_name = f'case_4_experiment_8_repeat_{j:04}.npz' # format i so that it's 0 padded on the left with 4 characters
    np.savez(file_name, experiment_8_4_outputs, Z)
    