# 1. Setup

In [1]:
# Import libraries and all new functions

import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
import re
from glob import glob
import os
from new_functions_py_file import *

In [2]:
# Read and clean data

adjacency_matrix = pd.read_csv("adjacency_matrix2.csv", header=0, index_col=0)
adjacency_matrix = clean_adjacency_mat(adjacency_matrix)

multilevel = pd.read_csv("multilevel2.csv", header=0, index_col=0) 
multilevel = clean_multilevel(multilevel, adjacency_matrix)

In [3]:
# Create the subset adjacency matrix

subset_leaf_list = ["Amyg_L_73_1", "Hippo_L_75_1"]
subset = subset_matrix_creator(subset_leaf_list, adjacency_matrix, multilevel)

In [4]:
# Create the descendants and ancestors matrices

descendants = adjacency_descendants(subset, N=20, mu=3.0)
ancestors = adjacency_ancestors(subset, N=20, mu=3.0)

# 2. Generate simulated data

In [5]:
np.random.seed(5)
generate_simulated_data(filename="test1_data.npz", subset=subset, case=1, n_repeats=100, N=20, mu=3.0)
generate_simulated_data(filename="test2_data.npz", subset=subset, case=2, n_repeats=100, N=20, mu=3.0)
generate_simulated_data(filename="test3_data.npz", subset=subset, case=3, n_repeats=100, N=20, mu=3.0)
generate_simulated_data(filename="test4_data.npz", subset=subset, case=4, n_repeats=100, N=20, mu=3.0)

In [6]:
# Check the test 1 data files that were created (100 repeats -> 100 .npz files)

test1_filenames = glob("test1_data_repeat_*")
test1_filenames = sorted(test1_filenames)

for i in range(0, 5):
    test = np.load(test1_filenames[i])
    print("\033[1m" + test1_filenames[i] + "\033[0m")
    print("\033[1m" + "X" + "\033[0m") 
    print(test["X"])

    print("\033[1m" + "Z" + "\033[0m") 
    print(test["Z"])

    print("\033[1m" + "G" + "\033[0m") 
    print(test["G"])
    print("\n")

[1mtest1_data_repeat_000000.npz[0m
[1mX[0m
[[ 0.44122749 -0.33087015]
 [ 2.43077119 -0.25209213]
 [ 0.10960984  1.58248112]
 [-0.9092324  -0.59163666]
 [ 0.18760323 -0.32986996]
 [-1.19276461 -0.20487651]
 [-0.35882895  0.6034716 ]
 [-1.66478853 -0.70017904]
 [ 1.15139101  1.85733101]
 [-1.51117956  0.64484751]
 [-0.98060789 -0.85685315]
 [-0.87187918 -0.42250793]
 [ 0.99643983  0.71242127]
 [ 0.05914424 -0.36331088]
 [ 0.00328884 -0.10593044]
 [ 0.79305332 -0.63157163]
 [-0.00619491 -0.10106761]
 [-0.05230815  0.24921766]
 [ 0.19766009  1.33484857]
 [-0.08687561  1.56153229]]
[1mZ[0m
[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0

# 3. Conduct permutation testing

In [7]:
# Permutation testing
permutation_testing(filename_old="test1_data_repeat_*", filename_new = "test1_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, initial_prob = 0.5, clip=0.001)
permutation_testing(filename_old="test2_data_repeat_*", filename_new = "test2_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, clip=0.0001, initial_prob = 0.5)
permutation_testing(filename_old="test3_data_repeat_*", filename_new = "test3_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, initial_prob = 0.25, clip=0.01)
permutation_testing(filename_old="test4_data_repeat_*", filename_new = "test4_results.npz", subset=subset, n_repeats=100, nperm=10, N=20, mu = 3.0, mu0 = 0.0, mu1=3.0, sigma2 = 1.0, niter=5, initial_prob = 0.75, clip=0.001)

ValueError: operands could not be broadcast together with shapes (10,2) (10,8) 

In [None]:
# Check the test 1 data files that were created (100 repeats -> 100 .npz files)

test1_filenames = glob("test1_results_repeat_*")
test1_filenames = sorted(test1_filenames)

for i in range(0, len(test1_filenames)):
    # Assign the file to an object called "test," which is a dictionary object
    test = np.load(test1_filenames[i])
    print("\033[1m" + test1_filenames[i] + "\033[0m") 

    # Print the values corresponding to each key
    print("\033[1m" + "p-values:" + "\033[0m")
    print(test["pval"])
    
    print("\033[1m" + "names:" + "\033[0m")
    print(test["names"])
    
    print("\033[1m" + "posterior:" + "\033[0m")
    print(test["posterior"])
    
    print("\033[1m" + "strings:" + "\033[0m")
    print(test["strings"])
    print("\n")