In [1]:
from pathlib import Path
from typing import Optional, List, Tuple
import traceback

from mpase import ( 
        mpase,
        CfgCommon, CfgHDR, CfgPF,
        export_all,
    )


In [2]:
###### data extraction for web visualization #########
BASE_DIR = Path("data/green_monkey/all_structure_files")
OUT_ROOT = Path("data/green_monkey/shape_data")

TIMES_ORDER = ["12hrs", "18hrs", "24hrs"]
CONDS_ORDER = ["untr", "vacv"]
VALIDCONDS = set(CONDS_ORDER)

def collect_csvs_and_labels(chrom_dir: Path) -> Tuple[List[str], List[str]]:
    """
    Recursively collect all structure_*_gene_info.csv under a chromosome,
    build labels as CHRNAME_TIME_COND using folder names for time/cond.
    """
    chr_name = chrom_dir.name
    csvs: List[Tuple[int, int, Path]] = []  # (time_idx, cond_idx, path)

    for p in chrom_dir.rglob("structure_*_gene_info.csv"):
        # Expect .../<time>/<cond>/structure_<time>_<cond>_gene_info.csv
        # Derive time and cond from folders to be robust
        if p.parent is None or p.parent.parent is None:
            continue
        cond = p.parent.name.lower()
        time = p.parent.parent.name

        if cond not in VALIDCONDS:
            continue
        if time not in TIMES_ORDER:
            # still allow, but put at end in a stable position
            time_idx = len(TIMES_ORDER)
        else:
            time_idx = TIMES_ORDER.index(time)

        cond_idx = CONDS_ORDER.index(cond)
        csvs.append((time_idx, cond_idx, p))

    # Sort by (time, cond) for stable ordering
    csvs.sort(key=lambda t: (t[0], t[1], str(t[2])))

    csv_list: List[str] = []
    labels: List[str] = []
    for _, _, path in csvs:
        time = path.parent.parent.name
        cond = path.parent.name.lower()
        label = f"{chr_name}_{time}_{cond.lower()}"
        csv_list.append(str(path))
        labels.append(label)

    return csv_list, labels

def main():
    OUT_ROOT.mkdir(parents=True, exist_ok=True)

    chrom_dirs = [d for d in BASE_DIR.iterdir() if d.is_dir()]
    if not chrom_dirs:
        print(f"[WARN] No chromosome folders in {BASE_DIR}")
        return

    done, skipped, failed = 0, 0, 0

    for chrom_dir in sorted(chrom_dirs, key=lambda p: p.name):
        chr_name = chrom_dir.name
        print(f"\n=== {chr_name} ===")

        csv_list, labels = collect_csvs_and_labels(chrom_dir)
        if len(csv_list) < 2:
            print(f"[SKIP] Found {len(csv_list)} file(s) under {chr_name}; need ≥2 to run.")
            skipped += 1
            continue

        out_dir = OUT_ROOT / chr_name
        out_dir.mkdir(parents=True, exist_ok=True)

        try:
            # Build configs (no out_dir in CfgCommon)
            cfg_common = CfgCommon(grid_base=160, pad_frac=0.05)
            cfg_hdr = CfgHDR()
            cfg_pf = CfgPF()

            print(f"[RUN] {chr_name}: {len(csv_list)} CSVs")
            result = mpase(
                csv_list=csv_list,
                labels=labels,
                cfg_common=cfg_common,
                cfg_hdr=cfg_hdr,
                cfg_pf=cfg_pf,
            )

            # Single, aggregated export per chromosome
            export_all(
                result,
                out_dir=str(out_dir),
                progress_report=True
            )
            print(f"[OK] Saved → {out_dir}")
            done += 1

        except Exception as e:
            failed += 1
            print(f"[FAIL] {chr_name}: {e}")
            traceback.print_exc()

    print("\n=== Summary ===")
    print(f"Completed: {done}")
    print(f"Skipped:   {skipped}")
    print(f"Failures:  {failed}")

if __name__ == "__main__":
    main()



=== chr1 ===
[RUN] chr1: 6 CSVs
[export] begin: out_dir=data/green_monkey/shape_data/chr1
[export] write: kind=meta, path=data/green_monkey/shape_data/chr1/meta_data.json
[export] write: kind=background_mask, path=data/green_monkey/shape_data/chr1/background_mask.json
[export] write: kind=density, label=chr1_12hrs_untr, path=data/green_monkey/shape_data/chr1/density/chr1_12hrs_untr_density.json
[export] write: kind=density, label=chr1_12hrs_vacv, path=data/green_monkey/shape_data/chr1/density/chr1_12hrs_vacv_density.json
[export] write: kind=density, label=chr1_18hrs_untr, path=data/green_monkey/shape_data/chr1/density/chr1_18hrs_untr_density.json
[export] write: kind=density, label=chr1_18hrs_vacv, path=data/green_monkey/shape_data/chr1/density/chr1_18hrs_vacv_density.json
[export] write: kind=density, label=chr1_24hrs_untr, path=data/green_monkey/shape_data/chr1/density/chr1_24hrs_untr_density.json
[export] write: kind=density, label=chr1_24hrs_vacv, path=data/green_monkey/shape_dat

In [2]:
####### structure_*_gene_info.csv → data/green_monkey/structure_gene_info_aligned #######
from pathlib import Path
from typing import List, Tuple
import os, json, traceback
import numpy as np

from mpase import mpase, CfgCommon

BASE_DIR = Path("data/green_monkey/all_structure_files")
OUT_ROOT = Path("data/green_monkey/structure_genes_aligned")

TIMES_ORDER = ["12hrs", "18hrs", "24hrs"]
CONDS_ORDER = ["untr", "vacv"]

def _derive_label(chr_name: str, p: Path) -> str:
    time = p.parent.parent.name if p.parent and p.parent.parent else "UNK"
    cond = p.parent.name.lower() if p.parent else "UNK"
    return f"{chr_name}_{time}_{cond.lower()}"

def _collect_geneinfo(chrom_dir: Path) -> Tuple[List[str], List[str]]:
    chr_name = chrom_dir.name
    hits = sorted(chrom_dir.rglob("structure_*_gene_info.csv"), key=str)
    csv_list, labels = [], []
    for p in hits:
        csv_list.append(str(p))
        labels.append(_derive_label(chr_name, p))
    return csv_list, labels

def main():
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    chrom_dirs = [d for d in BASE_DIR.iterdir() if d.is_dir()]
    if not chrom_dirs:
        print(f"[WARN] No chromosome folders in {BASE_DIR}")
        return

    done = skipped = failed = 0
    for chrom_dir in sorted(chrom_dirs, key=lambda p: p.name):
        chr_name = chrom_dir.name
        print(f"\n=== {chr_name} (gene_info) ===")
        csv_list, labels = _collect_geneinfo(chrom_dir)
        if len(csv_list) < 2:
            print(f"[SKIP] {chr_name}: only {len(csv_list)} file(s)")
            skipped += 1
            continue

        out_dir = OUT_ROOT / chr_name
        out_dir.mkdir(parents=True, exist_ok=True)

        try:
            cfg_common = CfgCommon(grid_base=160, pad_frac=0.05)
            result = mpase(
                csv_list=csv_list,
                labels=labels,
                cfg_common=cfg_common,
                point_alignment_only=True,
                align_mode="auto",
                out_dir=f"{OUT_ROOT}/{chr_name}"
            )

            print(f"[OK] Saved {len(labels)} files → {out_dir}")
            done += 1

        except Exception as e:
            failed += 1
            print(f"[FAIL] {chr_name}: {e}")
            traceback.print_exc()

    print("\n=== Summary (gene_info aligned) ===")
    print(f"Completed: {done}  Skipped: {skipped}  Failed: {failed}")

if __name__ == "__main__":
    main()



=== chr1 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr1

=== chr10 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr10

=== chr12 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr12

=== chr13 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr13

=== chr14 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr14

=== chr15 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr15

=== chr16 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr16

=== chr18 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr18

=== chr19 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr19

=== chr2 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_genes_aligned/chr2

=== chr20 (gene_info) ===
[OK] Saved 6 file

In [9]:
####### beads data aligned extraction #######
from pathlib import Path
from typing import List, Tuple
import os, json, traceback
import numpy as np

from mpase import mpase, CfgCommon

BASE_DIR = Path("data/green_monkey/all_structure_files")
OUT_ROOT = Path("data/green_monkey/structure_beads_aligned")

TIMES_ORDER = ["12hrs", "18hrs", "24hrs"]
CONDS_ORDER = ["untr", "vacv"]

def _derive_label(chr_name: str, p: Path) -> str:
    time = p.parent.parent.name if p.parent and p.parent.parent else "UNK"
    cond = p.parent.name.lower() if p.parent else "UNK"
    return f"{chr_name}_{time}_{cond.lower()}"

def _collect_geneinfo(chrom_dir: Path) -> Tuple[List[str], List[str]]:
    chr_name = chrom_dir.name
    hits = sorted(chrom_dir.rglob("structure_*_id0.csv"), key=str)
    csv_list, labels = [], []
    for p in hits:
        csv_list.append(str(p))
        labels.append(_derive_label(chr_name, p))
    return csv_list, labels

def main():
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    chrom_dirs = [d for d in BASE_DIR.iterdir() if d.is_dir()]
    if not chrom_dirs:
        print(f"[WARN] No chromosome folders in {BASE_DIR}")
        return

    done = skipped = failed = 0
    for chrom_dir in sorted(chrom_dirs, key=lambda p: p.name):
        chr_name = chrom_dir.name
        print(f"\n=== {chr_name} (gene_info) ===")
        csv_list, labels = _collect_geneinfo(chrom_dir)
        if len(csv_list) < 2:
            print(f"[SKIP] {chr_name}: only {len(csv_list)} file(s)")
            skipped += 1
            continue

        out_dir = OUT_ROOT / chr_name
        out_dir.mkdir(parents=True, exist_ok=True)

        try:
            cfg_common = CfgCommon(grid_base=160, pad_frac=0.05)
            result = mpase(
                csv_list=csv_list,
                labels=labels,
                xyz_cols=['x', 'y', 'z'],
                cfg_common=cfg_common,
                point_alignment_only=True,
                align_mode="auto",
                out_dir=f"{OUT_ROOT}/{chr_name}"
            )

            print(f"[OK] Saved {len(labels)} files → {out_dir}")
            done += 1

        except Exception as e:
            failed += 1
            print(f"[FAIL] {chr_name}: {e}")
            traceback.print_exc()

    print("\n=== Summary (gene_info aligned) ===")
    print(f"Completed: {done}  Skipped: {skipped}  Failed: {failed}")

if __name__ == "__main__":
    main()



=== chr1 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr1

=== chr10 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr10

=== chr12 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr12

=== chr13 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr13

=== chr14 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr14

=== chr15 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr15

=== chr16 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr16

=== chr18 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr18

=== chr19 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr19

=== chr2 (gene_info) ===
[OK] Saved 6 files → data/green_monkey/structure_beads_aligned/chr2

=== chr20 (gene_info) ===
[OK] Saved 6 file