In [1]:
import os
import subprocess
from glob import glob
from concurrent.futures import ThreadPoolExecutor

In [None]:
def iqtree_cmd(fasta_path):
    return [
        "iqtree",
        "-s", fasta_path,
        "-m", "HKY+F+I+G4",
        "-B", "1000",
        "--polytomy",
        "--redo",
    ]


def run_iqtree(fasta_file):
    cmd = iqtree_cmd(fasta_file)
    print(f"Running IQ-TREE for {fasta_file} with command: {' '.join(cmd)}")
    result = subprocess.run(cmd, capture_output=True, text=True)
    
    if result.returncode != 0:
        print(f"Error processing {fasta_file}: {result.stderr}")
    else:
        print(f"Successfully processed {fasta_file}")

In [None]:
base_dir = "../data/phylo/loci/"
fasta_files = [
    os.path.join(base_dir, f) 
    for f in os.listdir(base_dir) 
    if f.endswith(".fas")
]
fasta_files[:5]


In [None]:
threads = os.cpu_count() - 5
with ThreadPoolExecutor(max_workers=threads) as executor:
    futures = [executor.submit(run_iqtree, fasta_file) for fasta_file in fasta_files]

In [2]:
with open("../data/phylo/genetrees.nwk", "w") as concat:
    for genetree_path in glob("../data/phylo/loci/*.treefile"):
        with open(genetree_path, "r") as genetree:
            concat.write(genetree.readline())