In [1]:
import pathlib
from concurrent.futures import ProcessPoolExecutor, as_completed

import pandas as pd

from bolero_process.atac.sc.fragments_to_zarr import *

In [2]:
dataset = "Janssens2022Nature"
genome = "dm6"
cpu = 13
chrom_sizes_path = f"/ref/{genome}/fasta/{genome}.main.chrom.sizes"

In [3]:
frag_paths = pd.Series(
    {
        "-".join(p.parent.name.split("__")[:2]): p
        for p in pathlib.Path(f"/tempdata/{dataset}/").glob("*/fragments.tsv.gz")
    }
)
frag_paths.size

25

In [4]:
for chunk_start in range(0, frag_paths.size, cpu):
    with ProcessPoolExecutor(cpu) as exe:
        chunk_paths = frag_paths.iloc[chunk_start : chunk_start + cpu]
        fs = []
        for sample, path in chunk_paths.items():
            f = exe.submit(
                fragments_to_cutsite_zarr,
                fragments_path=path,
                chrom_sizes_path=chrom_sizes_path,
                output_zarr_path=f"/wmb/zarr/{dataset}.CutSites/{sample}.zarr",
                barcode_prefix=sample,
                format="bed",
                adjust_tn5=False,
                chunk_size=20000000,
                sort_fragments=True,
                remove_chr=True,
            )
            fs.append(f)
        for f in as_completed(fs):
            f.result()

Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-2b777e.zarr
Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-4e2bbb.zarrZarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-46d13d.zarrZarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-5e8b4e.zarr
Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-303662.zarrZarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-8f8273.zarr

Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-6d6d0c.zarr
Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-7bc63d.zarr
Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-8569df.zarr


Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-127e2f.zarrZarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-2c5b14.zarr

Zarr store already exists at /wmb/zarr/Janssens2022Nature.CutSites/DFB-59b45b.zarr
Zarr