In [1]:
from physhapes.helper_functions import *
from physhapes.mcmc import * 
import os 
import numpy as np
import subprocess


In [2]:
def create_mcmc_screen_script(experiment_path=None, datapath=None, phylo_path=None, num_chains=3, 
                              num_samples=3000, dt=0.05, lambd=0.7, obs_var=0.001, rb=2,
                              prior_sigma_min=0.0, prior_sigma_max=2.5,
                              prior_alpha_min=0.0, prior_alpha_max=0.03,
                              proposal_sigma_tau=0.2, proposal_alpha_tau=0.005,
                              n=20, d=2, proc=False, super_root="phylomean"):
    """Creates a bash script to run MCMC chains with reduced priority."""
    
    script_name = f'screen_{sub_id}.sh'
    if proc:
        output_path = f"{experiment_path}/mcmc_procrustes/id={sub_id}"
    else:
        output_path = f"{experiment_path}/mcmc/id={sub_id}"

    with open(script_name, 'w') as rsh:
        rsh.write(f'''#!/bin/bash
# Make output directory
mkdir -p {output_path}

# Add python script with environment variables directly in file
cat > {output_path}/run_with_limits.py << 'EOF'
import os
# Force environment variables BEFORE importing JAX
os.environ["XLA_FLAGS"] = "--xla_cpu_multi_thread_eigen=false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1" 
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["JAX_PLATFORM_NAME"] = "cpu"
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "0.1"
# Import normal modules and call script
import sys
import subprocess
subprocess.run([sys.executable, "run_mcmc.py"] + sys.argv[1:])
EOF

# Run chains with reduced priority
for i in $(seq 1 {num_chains}); do
  # Use nice to lower priority (-n 19 is lowest)
  screen -md -S {sub_id}_chain=$i bash -c '
    nice -n 19 python {output_path}/run_with_limits.py \\
    --outputpath {output_path} --phylopath {phylo_path} \\
    --datapath {datapath} --prior_sigma_min {prior_sigma_min} \\
    --prior_sigma_max {prior_sigma_max} --prior_alpha_min {prior_alpha_min} \\
    --prior_alpha_max {prior_alpha_max} --proposal_sigma_tau {proposal_sigma_tau} \\
    --proposal_alpha_tau {proposal_alpha_tau} --rb {rb} --obs_var {obs_var} \\
    --lambd {lambd} --dt {dt} --n {n} --d {d} --N {num_samples} \\
    --super_root {super_root} --use_wandb True --wandb_project "SPMS_MCMC"
  '
  echo "Started chain $i with reduced priority"
  sleep 10
done
''')
    
    # Make script executable
    import os
    os.chmod(script_name, 0o755)
    
    print(f"Created MCMC screen script: {script_name}")
    return script_name



In [None]:
def run_mcmc_for_all_folders(experiment_path="sigma=0.6_alpha=0.025_dt=0.05", 
                            phylo_path="../data/chazot_subtree_rounded.nw",
                            num_chains=3, 
                            num_samples=3000, 
                            dt=0.05, 
                            lambd=0.95, 
                            obs_var=0.0001, 
                            rb=1,
                            prior_sigma_min=0.0,
                            prior_sigma_max=1.0,
                            prior_alpha_min=0.0,
                            prior_alpha_max=0.05,
                            proposal_sigma_tau=0.1,
                            proposal_alpha_tau=0.005,
                            n=20, 
                            d=2, 
                            super_root="phylomean"):
    """
    Run MCMC for all seed folders in an experiment directory, 
    with both procrustes and non-procrustes alignments.
    """
    import os
    import numpy as np
    import subprocess
    import time
    
    # Get all seed folders in the experiment directory
    seed_folders = [f for f in os.listdir(experiment_path) if f.startswith("seed=")]
    print(f"Found {len(seed_folders)} seed folders in {experiment_path}")
    
    # Process each seed folder
    for i, seed_folder in enumerate(seed_folders):
        folder_path = f"{experiment_path}/{seed_folder}"
        
        # Check if necessary files exist
        if not os.path.isfile(f"{folder_path}/leaves.csv"):
            print(f"Skipping {folder_path}: leaves.csv not found")
            continue
            
        procrustes_path = f"{folder_path}/procrustes_aligned.csv"
        has_procrustes = os.path.isfile(procrustes_path)
        
        print(f"\nProcessing folder {i+1}/{len(seed_folders)}: {seed_folder}")
        
        # 1. Submit non-procrustes MCMC
        print("Submitting non-procrustes MCMC...")
        #non_proc_sub_id = str(np.random.randint(0, 1000_000_000))
        
        script = create_mcmc_screen_script(
            experiment_path=folder_path,
            datapath=f"{folder_path}/leaves.csv",
            phylo_path=phylo_path,
            num_chains=num_chains,
            num_samples=num_samples,
            dt=dt,
            lambd=lambd[1],
            obs_var=obs_var,
            rb=rb,
            prior_sigma_min=prior_sigma_min,
            prior_sigma_max=prior_sigma_max,
            prior_alpha_min=prior_alpha_min,
            prior_alpha_max=prior_alpha_max,
            proposal_sigma_tau=proposal_sigma_tau[0],
            proposal_alpha_tau=proposal_alpha_tau[0],
            n=n,
            d=d,
            proc=False,
            super_root=super_root
        )
        
        # Run the script
        subprocess.run(f"bash {script}", shell=True)
        
        # Wait between submissions to allow system to stabilize
        time.sleep(30)
        
        # 2. Submit procrustes MCMC if available
        if has_procrustes:
            print("Submitting procrustes MCMC...")
            #proc_sub_id = str(np.random.randint(0, 1000_000_000))
            
            script = create_mcmc_screen_script(
                experiment_path=folder_path,
                datapath=procrustes_path,
                phylo_path=phylo_path,
                num_chains=num_chains,
                num_samples=num_samples,
                dt=dt,
                lambd=lambd[1],
                obs_var=obs_var,
                rb=rb,
                prior_sigma_min=prior_sigma_min,
                prior_sigma_max=prior_sigma_max,
                prior_alpha_min=prior_alpha_min,
                prior_alpha_max=prior_alpha_max,
                proposal_sigma_tau=proposal_sigma_tau[1],
                proposal_alpha_tau=proposal_alpha_tau[1],
                n=n,
                d=d,
                proc=True,
                super_root=super_root
            )
            
            # Run the script
            subprocess.run(f"bash {script}", shell=True)
        else:
            print(f"Skipping procrustes MCMC for {folder_path}: procrustes_aligned.csv not found")
        
        # Wait longer between folders to ensure system resources are available
        print(f"Waiting 60 seconds before processing next folder...")
        time.sleep(60)
    
    print(f"\nAll {len(seed_folders)} folders processed!")
    return True

In [4]:
# MCMC settings
num_chains = 3
num_samples = 3000
dt = 0.05
lambd = [0.95, 0.95]
obs_var = 0.0001
rb = 1
prior_sigma_min = 0.0 
prior_sigma_max = 1.0
prior_alpha_min = 0.0
prior_alpha_max = 0.03
proposal_sigma_tau = [0.1, 0.1] # first parameter for notn-procrustes, second for procrustes
proposal_alpha_tau = [0.005, 0.005] 
#seed_start = np.random.randint(0,1000_000_000)
n = 20
d = 2
super_root = "phylomean"  # or "random"

# high level names 
procrustes = True
experiment_path = "unit_root/sigma=0.3_alpha=0.005_dt=0.05/beatty-050925"
phylo_path = "../data/chazot_subtree_rounded.nw"
sub_id = str(np.random.randint(0, 1000_000_000))


In [5]:
os.listdir(experiment_path)

['seed=869083029',
 'seed=1397427006',
 'seed=1267895950',
 'seed=1063451080',
 'seed=1428004241',
 'seed=754028303',
 'seed=4205842640',
 'seed=243078351',
 'seed=445806869',
 'seed=850934165',
 'seed=216617605',
 'seed=4163767504']

In [6]:
# Add this cell to your notebook and run it
result = run_mcmc_for_all_folders(
    experiment_path=experiment_path,
    phylo_path=phylo_path,
    num_chains=num_chains,
    num_samples=num_samples,
    dt=dt,
    lambd=lambd,
    obs_var=obs_var,
    rb=rb,
    prior_sigma_min=prior_sigma_min,
    prior_sigma_max=prior_sigma_max,
    prior_alpha_min=prior_alpha_min,
    prior_alpha_max=prior_alpha_max,
    proposal_sigma_tau=proposal_sigma_tau,
    proposal_alpha_tau=proposal_alpha_tau,
    n=n,
    d=d,
    super_root=super_root
)

Found 12 seed folders in unit_root/sigma=0.3_alpha=0.005_dt=0.05/beatty-050925

Processing folder 1/12: seed=869083029
Submitting non-procrustes MCMC...
Created MCMC screen script: screen_595137635.sh
Started chain 1 with reduced priority
Started chain 2 with reduced priority
Started chain 3 with reduced priority
Submitting procrustes MCMC...
Created MCMC screen script: screen_595137635.sh
Started chain 1 with reduced priority
Started chain 2 with reduced priority
Started chain 3 with reduced priority
Waiting 60 seconds before processing next folder...

Processing folder 2/12: seed=1397427006
Submitting non-procrustes MCMC...
Created MCMC screen script: screen_595137635.sh
Started chain 1 with reduced priority
Started chain 2 with reduced priority
Started chain 3 with reduced priority
Submitting procrustes MCMC...
Created MCMC screen script: screen_595137635.sh
Started chain 1 with reduced priority
Started chain 2 with reduced priority
Started chain 3 with reduced priority
Waiting 60 s

In [7]:
# Add a cell to execute the script
#script = create_mcmc_screen_script(experiment_path=experiment_path+"/"+simseed,
#    datapath=datapath,
#    phylo_path=phylo_path,
#    num_chains=num_chains,
#    num_samples=num_samples,
#    dt=dt,
#    lambd=lambd,
#    obs_var=obs_var,
#    rb=rb,
#    prior_sigma_min=prior_sigma_min,
#    prior_sigma_max=prior_sigma_max,
#    prior_alpha_min=prior_alpha_min,
#    prior_alpha_max=prior_alpha_max,
#    proposal_sigma_tau=proposal_sigma_tau,
#    proposal_alpha_tau=proposal_alpha_tau,
#    n=n,
#    d=d, 
#    proc=procrustes,
#    super_root=super_root)  # Fill in your parameters

#subprocess.run(f"bash {script}", shell=True)

In [8]:
def create_mcmc_screen_script_old(experiment_path=None, datapath=None, phylo_path=None, num_chains=3, 
                              num_samples=3000, dt=0.05, lambd=0.7, obs_var=0.001, rb=2,
                              prior_sigma_min=0.0, prior_sigma_max=2.5,
                              prior_alpha_min=0.0, prior_alpha_max=0.03,
                              proposal_sigma_tau=0.2, proposal_alpha_tau=0.005,
                              n=20, d=2, proc=False, super_root="phylomean"):
    """Creates a bash script to run MCMC chains in separate screen sessions."""
    
    script_name = f'screen_{sub_id}.sh'
    if proc:
        output_path = f"{experiment_path}/mcmc_procrustes/id={sub_id}"
    else:
        output_path = f"{experiment_path}/mcmc/id={sub_id}"

    with open(script_name, 'w') as rsh:
        rsh.write(f'''#!/bin/bash
# Set resource limits to control CPU usage
export XLA_PYTHON_CLIENT_MEM_FRACTION=0.2
export JAX_PLATFORM_NAME=cpu
export XLA_FLAGS="--xla_cpu_multi_thread_eigen=false"
export JAX_DISABLE_JIT=0
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
export NUMEXPR_NUM_THREADS=1

# Make output directory
mkdir -p {output_path}
for i in $(seq 1 {num_chains}); do
 # screen -md -S {sub_id}_chain=$i python run_mcmc.py --outputpath {output_path} --phylopath {phylo_path} --datapath {datapath} --prior_sigma_min {prior_sigma_min} --prior_sigma_max {prior_sigma_max} --prior_alpha_min {prior_alpha_min} --prior_alpha_max {prior_alpha_max} --proposal_sigma_tau {proposal_sigma_tau} --proposal_alpha_tau {proposal_alpha_tau} --rb {rb} --obs_var {obs_var} --lambd {lambd} --dt {dt} --n {n} --d {d} --N {num_samples} --super_root {super_root} --use_wandb True --wandb_project "SPMS_MCMC"
   # Pass environment variables directly to the command in screen
 screen -md -S {sub_id}_chain=$i bash -c 'XLA_PYTHON_CLIENT_MEM_FRACTION=0.2 \\
    JAX_PLATFORM_NAME=cpu \\
    XLA_FLAGS="--xla_cpu_multi_thread_eigen=false" \\
    OMP_NUM_THREADS=1 \\
    MKL_NUM_THREADS=1 \\
    NUMEXPR_NUM_THREADS=1 \\
    python run_mcmc.py --outputpath {output_path} --phylopath {phylo_path} \\
    --datapath {datapath} --prior_sigma_min {prior_sigma_min} \\
    --prior_sigma_max {prior_sigma_max} --prior_alpha_min {prior_alpha_min} \\
    --prior_alpha_max {prior_alpha_max} --proposal_sigma_tau {proposal_sigma_tau} \\
    --proposal_alpha_tau {proposal_alpha_tau} --rb {rb} --obs_var {obs_var} \\
    --lambd {lambd} --dt {dt} --n {n} --d {d} --N {num_samples} \\
    --super_root {super_root} --use_wandb True --wandb_project "SPMS_MCMC"'
    
  echo "Started chain $i"
  sleep 10  # Add delay between starting chains
done
done
''')
    
    # Make script executable
    import os
    os.chmod(script_name, 0o755)
    
    print(f"Created MCMC screen script: {script_name}")
    return script_name