In [1]:
import sys
import os
import shutil
import numpy as np
from replicate import one_intro, two_intro, NUM_SIMS

In [2]:
# set i to a number between 0 and 109999 as the ith run to verify
i = 12345
    
# read reported result
tsv_file = "main_one_intro_output.tsv"
with open(tsv_file, "r", encoding="utf-8") as f:
    # Skip the first n lines
    for _ in range(i+1):
        next(f, None)
    # Read the (n+1)-th line
    line = next(f, None) 
# Split the line by tabs and take columns 2 through 5 (1-based indexing => [1:5])
columns = line.strip().split('\t')
expected_values = columns[1:5]  # columns[1], columns[2], columns[3], columns[4]
    
# Reproduce replicate's SeedSequence list
seed = 42
one_intro_seedseq, two_intro_seedseq = np.random.SeedSequence(seed).spawn(2)
seedseq_list = one_intro_seedseq.spawn(NUM_SIMS)
# Get the i-th seed from the list
selected_seedseq = seedseq_list[i]
verification_dir  = "main_verifications"
parameters_file_name = "main_parameters.txt"
os.makedirs(verification_dir, exist_ok=True)
shutil.copy2(parameters_file_name, os.path.join(verification_dir, parameters_file_name))
os.chdir(verification_dir)
# set output directory
output_dir = os.path.join("one_intro", str(i))
# Run the single-introduction simulation using the i-th SeedSequence
one_intro(output_dir, parameters_file_name, selected_seedseq)
os.chdir('..')
  
# Prepare file paths in directory {n}
dir_name = os.path.join(verification_dir,'one_intro',str(i))
failures_path = os.path.join(dir_name, "failures.txt")
ab_path = os.path.join(dir_name, "AB.txt")
cc_path = os.path.join(dir_name, "CC.txt")
tree_hash_path = os.path.join(dir_name, "final_tree_hash.txt")
# Read the content of the four files
# Each file is stripped of trailing newlines/spaces
def read_file(path):
    if not os.path.isfile(path):
        print(f"File {path} does not exist.")
        sys.exit(1)
    with open(path, "r", encoding="utf-8") as f:
        return f.read().strip()
    
failures_val = read_file(failures_path)
failures_val = failures_val.split()[0]
ab_val       = read_file(ab_path)
cc_val       = read_file(cc_path)
tree_hash_val = read_file(tree_hash_path)
    
# Compare each file’s contents to its corresponding expected value
file_values = [failures_val, ab_val, cc_val, tree_hash_val]
file_names  = ["failures.txt", "AB.txt", "CC.txt", "final_tree_hash.txt"]
    
print(f"Verifying one-intro run {i} by regenerating results and comparing them to those reported in main_one_intro_output.tsv")
for i, (file_val, expected_val) in enumerate(zip(file_values, expected_values)):
    match_status = "MATCH" if file_val == expected_val else "MISMATCH"
    print(f"{file_names[i]} => Reported: '{expected_val}' | Regenerated: '{file_val}' | {match_status}")


Verifying one-intro run 12345 by regenerating results and comparing them to those reported in main_one_intro_output.tsv
failures.txt => Reported: '4' | Regenerated: '4' | MATCH
AB.txt => Reported: '0' | Regenerated: '0' | MATCH
CC.txt => Reported: '0' | Regenerated: '0' | MATCH
final_tree_hash.txt => Reported: '9f897f52cecf15bf4b1c139835918ad02e3a851fab9db69a640ac38a77655807' | Regenerated: '9f897f52cecf15bf4b1c139835918ad02e3a851fab9db69a640ac38a77655807' | MATCH


In [None]:
# set i to a number between 0 and 109999 as the ith pair of two intros to verify
i = 6
    
# read reported result
tsv_file = "main_two_intro_output.tsv"
with open(tsv_file, "r", encoding="utf-8") as f:
    # Skip the first n lines
    for _ in range(i+1):
        next(f, None)
    # Read the (n+1)-th line
    line = next(f, None) 
# Split the line by tabs and take columns 2 through 4 (1-based indexing => [1:5])
columns = line.strip().split('\t')
expected_values = columns[1:4]  # columns[1], columns[2], columns[3], columns[4]
    
# Reproduce the main script’s seed logic
seed = 42
# spawn(2) yields two child SeedSequence objects
one_intro_seedseq, two_intro_seedseq = np.random.SeedSequence(seed).spawn(2)
# Then spawn 110,000 seeds for one_intro:
one_intro_seedseq_list = one_intro_seedseq.spawn(NUM_SIMS)
# And 110,000 seeds for two_intro:
two_intro_seedseq_list = two_intro_seedseq.spawn(NUM_SIMS)
# Create the RNG for the i-th two_intro run
rng = np.random.default_rng(two_intro_seedseq_list[i])

verification_dir  = "main_verifications"
parameters_file_name = "main_parameters.txt"
os.makedirs(verification_dir, exist_ok=True)
shutil.copy2(parameters_file_name, os.path.join(verification_dir, parameters_file_name))
os.chdir(verification_dir)

# 1) Pick two single-intro runs at random and run them first
one_intro_runs = []
for _ in range(2):
    # Randomly pick an index from 0..(NUM_SIMS-1)
    chosen_idx = rng.integers(NUM_SIMS)
    one_intro_runs.append(chosen_idx)

    # Output directory for the single-intro run
    single_intro_outdir = os.path.join("one_intro", str(chosen_idx))

    # Run the single-intro simulation
    one_intro(
        single_intro_outdir,
        parameters_file_name,
        one_intro_seedseq_list[chosen_idx]
    )

# 2) Now combine those two single-intro simulations using two_intro
two_intro_outdir = os.path.join("two_intro", str(i))
os.makedirs(two_intro_outdir, exist_ok=True)

# Call two_intro with the same rng
two_intro(
    two_intro_outdir,
    parameters_file_name,
    rng,
    one_intro_runs[0],
    one_intro_runs[1]
)
os.chdir('..')
  
# Prepare file paths in directory {n}
dir_name = os.path.join(verification_dir,'two_intro',str(i))
ab_path = os.path.join(dir_name, "AB_array.csv")
cc_path = os.path.join(dir_name, "CC_array.csv")
tree_hash_path = os.path.join(dir_name, "final_tree_hash.txt")
# Read the content of the files
AB = np.loadtxt(ab_path, delimiter=",")  
CC = np.loadtxt(cc_path, delimiter=",")
AB_nonzero = np.nonzero(AB)
CC_nonzero = np.nonzero(CC)
ab_val = ",".join(f"({r},{c})" for r,c in zip(*AB_nonzero))
cc_val = ",".join(f"({r},{c})" for r,c in zip(*CC_nonzero))
with open(tree_hash_path, 'r') as f:
    tree_hash_val = f.readline().strip()

# Compare each file’s contents to its corresponding expected value
file_values = [ab_val, cc_val, tree_hash_val]
file_names  = ["AB_array.csv", "CC_array.csv", "final_tree_hash.txt"]
    
print(f"Verifying two-intro pair {i} by regenerating results and comparing them to those reported in main_two_intro_output.tsv")
for i, (file_val, expected_val) in enumerate(zip(file_values, expected_values)):
    match_status = "MATCH" if file_val == expected_val else "MISMATCH"
    print(f"{file_names[i]} => Reported: '{expected_val}' | Regenerated: '{file_val}' | {match_status}")
