# 1. Setup

In [1]:
# Import libraries and all new functions

import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
import re
from glob import glob
import os
from new_functions_py_file import *

In [2]:
# Read and clean data

adjacency_matrix = pd.read_csv("adjacency_matrix2.csv", header=0, index_col=0)
adjacency_matrix = clean_adjacency_mat(adjacency_matrix)

multilevel = pd.read_csv("multilevel2.csv", header=0, index_col=0) 
multilevel = clean_multilevel(multilevel, adjacency_matrix)

In [3]:
# Create the subset adjacency matrix

subset_leaf_list = ["Amyg_L_73_1", "Hippo_L_75_1"]
subset = subset_matrix_creator(subset_leaf_list, adjacency_matrix, multilevel)

In [4]:
# Create the descendants and ancestors matrices

descendants = adjacency_descendants(subset, N=20, mu=3.0)
ancestors = adjacency_ancestors(subset, N=20, mu=3.0)

# 2. Generate simulated data

In [5]:
np.random.seed(5)
generate_simulated_data(filename="test1_data.npz", subset=subset, case=1, n_repeats=100, N=20, mu=3.0)
generate_simulated_data(filename="test2_data.npz", subset=subset, case=2, n_repeats=100, N=20, mu=3.0)
generate_simulated_data(filename="test3_data.npz", subset=subset, case=3, n_repeats=100, N=20, mu=3.0)
generate_simulated_data(filename="test4_data.npz", subset=subset, case=4, n_repeats=100, N=20, mu=3.0)

In [6]:
# Check the test 1 data files that were created (100 repeats -> 100 .npz files)

test1_filenames = glob("test1_data_repeat_*")
test1_filenames = sorted(test1_filenames)

for i in range(0, 5):
    test = np.load(test1_filenames[i])
    print("\033[1m" + test1_filenames[i] + "\033[0m")
    print("\033[1m" + "X" + "\033[0m") 
    print(test["X"])

    print("\033[1m" + "Z" + "\033[0m") 
    print(test["Z"])

    print("\033[1m" + "G" + "\033[0m") 
    print(test["G"])
    print("\n")

[1mtest1_data_repeat_000000.npz[0m
[1mX[0m
[[ 0.44122749 -0.33087015]
 [ 2.43077119 -0.25209213]
 [ 0.10960984  1.58248112]
 [-0.9092324  -0.59163666]
 [ 0.18760323 -0.32986996]
 [-1.19276461 -0.20487651]
 [-0.35882895  0.6034716 ]
 [-1.66478853 -0.70017904]
 [ 1.15139101  1.85733101]
 [-1.51117956  0.64484751]
 [-0.98060789 -0.85685315]
 [-0.87187918 -0.42250793]
 [ 0.99643983  0.71242127]
 [ 0.05914424 -0.36331088]
 [ 0.00328884 -0.10593044]
 [ 0.79305332 -0.63157163]
 [-0.00619491 -0.10106761]
 [-0.05230815  0.24921766]
 [ 0.19766009  1.33484857]
 [-0.08687561  1.56153229]]
[1mZ[0m
[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0

# 3. Conduct permutation testing

In [7]:
permutation_testing(filename_old="test1_data_repeat_*", filename_new = "test1_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, initial_prob = 0.5, clip=0.001)

{'p-values': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 'names': ['Limbic_L_434_3',
  'CerebralCortex_L_482_4',
  'Telencephalon_L_501_5',
  'Everything',
  'Amyg_L_73_1',
  'Amyg_L_336_2',
  'Hippo_L_75_1',
  'Hippo_L_338_2'],
 'posterior': [0.46353548406317174,
  0.46353548406317174,
  0.46353548406317174,
  0.46353548406317174,
  0.1674973330141576,
  0.1674973330141576,
  0.1576251896237655,
  0.1576251896237655],
 'strings': ['Everything, P[Z=1|X]=0.46353548406317174, p=1.0',
  'Telencephalon_L_501_5, P[Z=1|X]=0.46353548406317174, p=1.0',
  'CerebralCortex_L_482_4, P[Z=1|X]=0.46353548406317174, p=1.0',
  'Limbic_L_434_3, P[Z=1|X]=0.46353548406317174, p=1.0',
  'Amyg_L_336_2, P[Z=1|X]=0.1674973330141576, p=1.0',
  'Amyg_L_73_1, P[Z=1|X]=0.1674973330141576, p=1.0',
  'Hippo_L_338_2, P[Z=1|X]=0.1576251896237655, p=1.0',
  'Hippo_L_75_1, P[Z=1|X]=0.1576251896237655, p=1.0'],
 'P_subset': array([0.46353548, 0.46353548, 0.46353548, 0.46353548, 0.15762519,
        0.16749733, 0.15762519,

In [8]:
permutation_testing(filename_old="test2_data_repeat_*", filename_new = "test2_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, clip=0.0001, initial_prob = 0.5)

{'p-values': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8, 0.8],
 'names': ['Limbic_L_434_3',
  'CerebralCortex_L_482_4',
  'Telencephalon_L_501_5',
  'Everything',
  'Hippo_L_75_1',
  'Hippo_L_338_2',
  'Amyg_L_73_1',
  'Amyg_L_336_2'],
 'posterior': [0.7119158910094903,
  0.7119158910094903,
  0.7119158910094903,
  0.7119158910094903,
  0.5180468573519893,
  0.5180468573519893,
  0.12707237798942403,
  0.12707237798942403],
 'strings': ['Everything, P[Z=1|X]=0.7119158910094903, p=1.0',
  'Telencephalon_L_501_5, P[Z=1|X]=0.7119158910094903, p=1.0',
  'CerebralCortex_L_482_4, P[Z=1|X]=0.7119158910094903, p=1.0',
  'Limbic_L_434_3, P[Z=1|X]=0.7119158910094903, p=1.0',
  'Hippo_L_338_2, P[Z=1|X]=0.5180468573519893, p=1.0',
  'Hippo_L_75_1, P[Z=1|X]=0.5180468573519893, p=1.0',
  'Amyg_L_336_2, P[Z=1|X]=0.12707237798942403, p=0.8',
  'Amyg_L_73_1, P[Z=1|X]=0.12707237798942403, p=0.8'],
 'P_subset': array([0.71191589, 0.71191589, 0.71191589, 0.71191589, 0.51804686,
        0.12707238, 0.51804686, 0.1

In [9]:
permutation_testing(filename_old="test3_data_repeat_*", filename_new = "test3_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, initial_prob = 0.25, clip=0.01)

{'p-values': [1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.0, 0.0],
 'names': ['Limbic_L_434_3',
  'CerebralCortex_L_482_4',
  'Telencephalon_L_501_5',
  'Everything',
  'Amyg_L_73_1',
  'Amyg_L_336_2',
  'Hippo_L_75_1',
  'Hippo_L_338_2'],
 'posterior': [0.6574074571105788,
  0.6574074571105788,
  0.6574074571105788,
  0.6574074571105788,
  0.494605548480706,
  0.494605548480706,
  0.4942078709730948,
  0.4942078709730948],
 'strings': ['Everything, P[Z=1|X]=0.6574074571105788, p=1.0',
  'Telencephalon_L_501_5, P[Z=1|X]=0.6574074571105788, p=1.0',
  'CerebralCortex_L_482_4, P[Z=1|X]=0.6574074571105788, p=1.0',
  'Limbic_L_434_3, P[Z=1|X]=0.6574074571105788, p=1.0',
  'Amyg_L_336_2, P[Z=1|X]=0.494605548480706, p=0.9',
  'Amyg_L_73_1, P[Z=1|X]=0.494605548480706, p=0.9',
  'Hippo_L_338_2, P[Z=1|X]=0.4942078709730948, p=0.0',
  'Hippo_L_75_1, P[Z=1|X]=0.4942078709730948, p=0.0'],
 'P_subset': array([0.65740746, 0.65740746, 0.65740746, 0.65740746, 0.49420787,
        0.49460555, 0.49420787, 0.49460555]

In [10]:
permutation_testing(filename_old="test4_data_repeat_*", filename_new = "test4_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, initial_prob = 0.75, clip=0.001)

{'p-values': [0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 0.4, 0.4],
 'names': ['Limbic_L_434_3',
  'CerebralCortex_L_482_4',
  'Telencephalon_L_501_5',
  'Everything',
  'Hippo_L_75_1',
  'Hippo_L_338_2',
  'Amyg_L_73_1',
  'Amyg_L_336_2'],
 'posterior': [0.8438089364885814,
  0.8438089364885814,
  0.8438089364885814,
  0.8438089364885814,
  0.4573941915680505,
  0.4573941915680505,
  0.3948504358248672,
  0.3948504358248672],
 'strings': ['Everything, P[Z=1|X]=0.8438089364885814, p=0.5',
  'Telencephalon_L_501_5, P[Z=1|X]=0.8438089364885814, p=0.5',
  'CerebralCortex_L_482_4, P[Z=1|X]=0.8438089364885814, p=0.5',
  'Limbic_L_434_3, P[Z=1|X]=0.8438089364885814, p=0.5',
  'Hippo_L_338_2, P[Z=1|X]=0.4573941915680505, p=1.0',
  'Hippo_L_75_1, P[Z=1|X]=0.4573941915680505, p=1.0',
  'Amyg_L_336_2, P[Z=1|X]=0.3948504358248672, p=0.4',
  'Amyg_L_73_1, P[Z=1|X]=0.3948504358248672, p=0.4'],
 'P_subset': array([0.84380894, 0.84380894, 0.84380894, 0.84380894, 0.45739419,
        0.39485044, 0.45739419, 0.39485

In [11]:
# Check the test 1 data files that were created (100 repeats -> 100 .npz files)

test1_filenames = glob("test1_results_repeat_*")
test1_filenames = sorted(test1_filenames)

for i in range(0, 5):
    # Assign the file to an object called "test," which is a dictionary object
    test = np.load(test1_filenames[i])
    print("\033[1m" + test1_filenames[i] + "\033[0m") 

    # Print the values corresponding to each key
    print("\033[1m" + "p-values:" + "\033[0m")
    print(test["pval"])
    
    print("\033[1m" + "names:" + "\033[0m")
    print(test["names"])
    
    print("\033[1m" + "posterior:" + "\033[0m")
    print(test["posterior"])
    
    print("\033[1m" + "strings:" + "\033[0m")
    print(test["strings"])
    print("\n")

[1mtest1_results_repeat_000000.npz[0m
[1mp-values:[0m
[0.9 0.9 0.9 0.9 0.7 0.7 0.9 0.9]
[1mnames:[0m
['Limbic_L_434_3' 'CerebralCortex_L_482_4' 'Telencephalon_L_501_5'
 'Everything' 'Amyg_L_73_1' 'Amyg_L_336_2' 'Hippo_L_75_1' 'Hippo_L_338_2']
[1mposterior:[0m
[0.31859903 0.31859903 0.31859903 0.31859903 0.12819191 0.12819191
 0.08477612 0.08477612]
[1mstrings:[0m
['Everything, P[Z=1|X]=0.31859903413239576, p=0.9'
 'Telencephalon_L_501_5, P[Z=1|X]=0.31859903413239576, p=0.9'
 'CerebralCortex_L_482_4, P[Z=1|X]=0.31859903413239576, p=0.9'
 'Limbic_L_434_3, P[Z=1|X]=0.31859903413239576, p=0.9'
 'Amyg_L_336_2, P[Z=1|X]=0.12819190700777114, p=0.7'
 'Amyg_L_73_1, P[Z=1|X]=0.12819190700777114, p=0.7'
 'Hippo_L_338_2, P[Z=1|X]=0.08477612216937935, p=0.9'
 'Hippo_L_75_1, P[Z=1|X]=0.08477612216937935, p=0.9']


[1mtest1_results_repeat_000001.npz[0m
[1mp-values:[0m
[0.4 0.4 0.4 0.4 0.3 0.3 0.5 0.5]
[1mnames:[0m
['Limbic_L_434_3' 'CerebralCortex_L_482_4' 'Telencephalon_L_501_5'
 'Ev

# 4. Calculate false positive rate and false negative rate

In [12]:
# Testing the error types function (case 1)

error_types(subset, file_names = "test1_results_repeat_*", n_repeats = 100, case = 1)

{'True positives': array([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0.,

In [13]:
# Testing the error types function (case 2)

error_types(subset, file_names = "test2_results_repeat_*", n_repeats = 100, case = 2)

{'True positives': array([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 1., 0.],
        [0., 0., 0., 0., 1., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0.,

In [14]:
# Testing the error types function (case 3)

error_types(subset, file_names = "test3_results_repeat_*", n_repeats = 100, case = 3)

{'True positives': array([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0.,

In [16]:
# Testing the error types function (case 4)

error_types(subset, file_names = "test4_results_repeat_*", n_repeats = 100, case = 4)

{'True positives': array([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0.,