In [1]:
import os
import numpy as np
import tskit
import csv

# ─────────────────────────────────────────────────────────────────────────────
# Helper: jitter node times within each generation (skip samples)
# ─────────────────────────────────────────────────────────────────────────────
def jitter_within_generations(ts: tskit.TreeSequence, eps: float = 1e-9) -> tskit.TreeSequence:
    """
    Make node times strictly increasing within each generation by adding tiny,
    deterministic jitters to nodes that share the same time. Samples (time=0)
    are not jittered. Ordering is by node ID to be deterministic.
    """
    tables = ts.dump_tables()  # safe, editable copy
    times = tables.nodes.time.astype(float).copy()

    # Identify samples so we don't move them off time 0
    is_sample = np.zeros(len(times), dtype=bool)
    is_sample[ts.samples()] = True

    # For each unique time, spread tied non-sample nodes by eps * rank
    for t in np.unique(times):
        idx = np.where(times == t)[0]
        idx = [i for i in idx if not is_sample[i]]
        if len(idx) > 1:
            for rank, node_id in enumerate(sorted(idx)):
                times[node_id] = t + eps * rank

    # Write back and re-sort to keep table invariants
    tables.nodes.set_columns(
        flags=tables.nodes.flags,
        time=times,
        population=tables.nodes.population,
        individual=tables.nodes.individual,
        metadata=tables.nodes.metadata,
        metadata_offset=tables.nodes.metadata_offset,
    )
    tables.sort()
    return tables.tree_sequence()

# ─────────────────────────────────────────────────────────────────────────────
# Main conversion script: export .tree (Newick) + .csv (breakpoints)
# ─────────────────────────────────────────────────────────────────────────────
input_dir = "trees"
output_dir = "trees_newick"
os.makedirs(output_dir, exist_ok=True)

for filename in sorted(os.listdir(input_dir)):
    if filename.endswith(".trees") and filename.startswith("r"):
        input_path = os.path.join(input_dir, filename)
        stem = os.path.splitext(filename)[0]  # e.g. "r3000001"

        # Output files (.tree + .csv)
        output_tree_path = os.path.join(output_dir, f"{stem}.tree")
        output_csv_path  = os.path.join(output_dir, f"{stem}_breaks.csv")

        # Load, jitter, and convert to Newick
        ts = tskit.load(input_path)
        ts = jitter_within_generations(ts, eps=1e-9)

        # Export Newick file (one tree per line)
        with open(output_tree_path, "w") as f:
            for tree in ts.trees():
                f.write(tree.newick() + "\n")

        # Export CSV file with genomic intervals
        with open(output_csv_path, "w", newline="") as cf:
            writer = csv.writer(cf)
            writer.writerow(["file", "tree_index", "left", "right", "mid"])
            for i, tree in enumerate(ts.trees()):
                left, right = tree.interval.left, tree.interval.right
                mid = 0.5 * (left + right)
                writer.writerow([stem, i, left, right, mid])

        print(f"✅ {filename} → {output_tree_path}, {output_csv_path}")

print("🎉 All tree files converted successfully with jitter + interval CSVs!")

✅ r3000000.trees → trees_newick/r3000000.tree, trees_newick/r3000000_breaks.csv
✅ r3000001.trees → trees_newick/r3000001.tree, trees_newick/r3000001_breaks.csv
✅ r3000002.trees → trees_newick/r3000002.tree, trees_newick/r3000002_breaks.csv
✅ r3000003.trees → trees_newick/r3000003.tree, trees_newick/r3000003_breaks.csv
✅ r3000004.trees → trees_newick/r3000004.tree, trees_newick/r3000004_breaks.csv
✅ r3000005.trees → trees_newick/r3000005.tree, trees_newick/r3000005_breaks.csv
✅ r3000006.trees → trees_newick/r3000006.tree, trees_newick/r3000006_breaks.csv
✅ r3000007.trees → trees_newick/r3000007.tree, trees_newick/r3000007_breaks.csv
✅ r3000008.trees → trees_newick/r3000008.tree, trees_newick/r3000008_breaks.csv
✅ r3000009.trees → trees_newick/r3000009.tree, trees_newick/r3000009_breaks.csv
✅ r3000010.trees → trees_newick/r3000010.tree, trees_newick/r3000010_breaks.csv
✅ r3000011.trees → trees_newick/r3000011.tree, trees_newick/r3000011_breaks.csv
✅ r3000012.trees → trees_newick/r3000012