# Create metadata to prepare at pseudobulk level

In [1]:
import os
import pandas as pd
from pathlib import Path
import schicluster

In [2]:
PKG_DIR = schicluster.__path__[0]

In [3]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/")
parent_root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/")
raw_allc_subdir = "fastq_demultiplex"

In [4]:
def prep_dir(output_dir, chunk_df, template, params):
    os.makedirs(output_dir, exist_ok=True)
    cell_table_path = os.path.join(output_dir, "cell_table.csv")
    chunk_df.to_csv(cell_table_path, header=False, index=True)
    params_str = "\n".join(f"{k} = {v}" for k, v in params.items())

    with open(os.path.join(output_dir, "Snakefile_master"), "w") as f:
        f.write(params_str + template)
    return

In [5]:
sbatch_header = (
    "#! /bin/bash\n"
    "#SBATCH -p condo\n"
    "#SBATCH -q condo\n"
    "#SBATCH -A csd772\n"
    "#SBATCH -J hh_{0}_s{1}_{2}\n"
    "#SBATCH -N {3}\n"
    "#SBATCH -c {4}\n"
    "#SBATCH --mem {5}G\n"
    "#SBATCH -t {6}:00:00\n"
    "#SBATCH -o /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/{0}/bulk_domain_scripts/step{1}_{2}.out\n"
    "#SBATCH -e /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/{0}/bulk_domain_scripts/step{1}_{2}.err\n"
    "#SBATCH --mail-user biy022@health.ucsd.edu\n"
    "#SBATCH --mail-type FAIL\n"
    "\n"
    "source ~/.bashrc\n"
    "conda activate schicluster\n"
    "cd {7}\n"
)

In [6]:
scmethyl_root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic")
chrom_size_path = scmethyl_root / "genome" / "genome_hg38" / "hg38.autosomal.chrom.sizes"

## Oligo as test

In [7]:
domain_cluster_age_dir = root / "domain_cluster_age"
oligo_dir = domain_cluster_age_dir / "Oligo"

### Oligo 2040 Downsample

In [8]:
oligo_2040_dir = oligo_dir / "Age2040_DOWN"
cool_list = list(oligo_2040_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [9]:
cell_table = pd.DataFrame(
    cool_list,
    index=[xx.stem for xx in cool_list],
    columns=["cool_path"]
)
cell_table.to_csv(oligo_2040_dir / "cell_table_bulk.csv", header=False, index=True)

In [10]:
params = {
    "resolution": 25000,
    "chrom_size_path": '"{}"'.format(chrom_size_path),
}

In [11]:
chunksize = 200
resolution = 25000

In [12]:
with open("{}/cool/Snakefile_chunk_template".format(PKG_DIR)) as f:
    GENERATE_MATRIX_CHUNK_TEMPLATE = f.read()
(oligo_2040_dir / "Age2040").mkdir(exist_ok=True, parents=True)

total_chunk_dirs = []
if cell_table.shape[0] <= chunksize:
    curr_dir = oligo_2040_dir / ("Age2040" + "_chunk0")
    params["cell_table_path"] = '"{}"'.format(curr_dir / "cell_table.csv")
    prep_dir(str(curr_dir), cell_table, GENERATE_MATRIX_CHUNK_TEMPLATE, params)
    total_chunk_dirs.append(curr_dir)
else:
    cell_table["chunk"] = [i // chunksize for i in range(0, cell_table.shape[0])]
    for chunk, chunk_df in cell_table.groupby("chunk"):
        curr_dir = oligo_2040_dir / ("Age2040" + f"_chunk{chunk}")
        params["cell_table_path"] = '"{}"'.format(curr_dir / "cell_table.csv")
        prep_dir(str(curr_dir), chunk_df, GENERATE_MATRIX_CHUNK_TEMPLATE, params)
        total_chunk_dirs.append(curr_dir)

In [13]:
script_dir = oligo_2040_dir / "bulk_domain_scripts"
script_dir.mkdir(exist_ok=True)
with (script_dir / "snakemake_cmd_step1.txt").open("w") as f:
    for chunk_dir in total_chunk_dirs:
        cmd = "snakemake -d {0} --snakefile {0}/Snakefile_master -j 5 --rerun-incomplete".format(chunk_dir)
        f.write(cmd + "\n")

In [14]:
if "cell_table_path" in params.keys():
    params.pop("cell_table_path")
params["output_dir"] = '"{}"'.format(oligo_2040_dir)
params_str = "\n".join("{} = {}".format(k, v) for k, v in params.items())

with open("{}/cool/Snakefile_group_template".format(PKG_DIR), "r") as f:
    GENERATE_MATRIX_GROUP_TEMPLATE = f.read()

with open(oligo_2040_dir / "bulk_domain_scripts" / "Snakefile", "w") as f:
    f.write(params_str + "\n" + GENERATE_MATRIX_GROUP_TEMPLATE)

with open(oligo_2040_dir / "bulk_domain_scripts" / "snakemake_cmd_step2.txt", "w") as f:
    cmd = "snakemake -d {} --snakefile {} -j 10 --rerun-incomplete".format(
        oligo_2040_dir,
        oligo_2040_dir / "bulk_domain_scripts" / "Snakefile"
    )
    f.write(cmd + "\n")

In [15]:
chunksize = 40
with open(oligo_2040_dir / "bulk_domain_scripts" / "snakemake_cmd_step1.txt", "r") as finput:
    cmds = finput.readlines()

index = 0
for i in range(0, len(cmds), chunksize):
    with open(oligo_2040_dir / "bulk_domain_scripts" / "snakemake_cmd_step1_{}.txt".format(index), "w") as f:
        f.write("\n".join([xx.strip() for xx in cmds[i:(i+chunksize)]]) + "\n")
    with open(oligo_2040_dir / "bulk_domain_scripts" / "step1_{}.sbatch".format(index), "w") as f:
        f.write(sbatch_header.format("domain_cluster_age/Oligo/Age2040_DOWN", 1, index, 1, 16, 100, 4, oligo_2040_dir / "bulk_domain_scripts") + "\n")
        f.write("bash snakemake_cmd_step1_{}.txt\n".format(index))
    index += 1

## Some customized functions

In [8]:
def create_bulk_contact_file(cell_table_file_name, output_file):
    cell_table = pd.read_csv(cell_table_file_name, sep=",", header=None, index_col=0)
    contact_tables = []
    for barcode in cell_table.index:
        donor_name, cell_name = barcode.split("-", 1)
        contact_file = parent_root / f"{donor_name}_deep" / "hic" / "rmbkl" 
        contact_file = contact_file / f"{barcode}.contact.rmbkl.tsv.gz"
        contact_tables.append(pd.read_csv(contact_file, sep="\t", header=None, index_col=None))
    bulk_contact_table = pd.concat(contact_tables, axis=0)
    bulk_contact_table.to_csv(output_file, header=False, index=False, sep="\t", compression="infer")

In [9]:
prep_impute_str_template = (
    "hicluster prepare-impute \\\n"
    "\t--cell_table {0} \\\n"
    "\t--batch_size 1536 --pad 1 --cpu_per_job 8 \\\n"
    "\t--output_dir {1} \\\n"
    "\t--chrom_size_path {2} \\\n"
    "\t--output_dist 10050000 \\\n"
    "\t--window_size 30000000 \\\n"
    "\t--step_size 10000000 \\\n"
    "\t--resolution 25000\n"
)

In [10]:
impute_sbatch_template = (
    "#! /bin/bash\n"
    "#SBATCH -p condo\n"
    "#SBATCH -q condo\n"
    "#SBATCH -A csd772\n"
    "#SBATCH -J {0}\n"
    "#SBATCH -N 3\n"
    "#SBATCH -c 16\n"
    "#SBATCH --mem 450G\n"
    "#SBATCH -t 12:00:00\n"
    "#SBATCH -o {1}\n"
    "#SBATCH -e {2}\n"
    "#SBATCH --mail-user biy022@health.ucsd.edu\n"
    "#SBATCH --mail-type FAIL\n\n"

    "source ~/.bashrc\n"
    "conda activate schicluster\n\n"

    "cd {3}\n"
    "snakemake -d {4} --snakefile {5} -j 16\n"
)

In [11]:
aggregate_sbatch_template = (
    "#! /bin/bash\n"
    "#SBATCH -p condo\n"
    "#SBATCH -q condo\n"
    "#SBATCH -A csd772\n"
    "#SBATCH -J {0}\n"
    "#SBATCH -N 1\n"
    "#SBATCH -c 16\n"
    "#SBATCH --mem 100G\n"
    "#SBATCH -t 4:00:00\n"
    "#SBATCH -o {1}\n"
    "#SBATCH -e {2}\n"
    "#SBATCH --mail-user biy022@health.ucsd.edu\n"
    "#SBATCH --mail-type FAIL\n"
    "\n"
    "source ~/.bashrc\n"
    "conda activate schicluster\n"
    "cd {3}\n"
    "bash {4}\n"
)

In [12]:
def generate_Q_cool(output_dir, cool_list, sub_dir, job_name):

    cell_table = pd.DataFrame(
        cool_list,
        index=[xx.stem for xx in cool_list],
        columns=["cool_path"]
    )
    cell_table.to_csv(output_dir / "cell_table_bulk.csv", header=False, index=True)

    params = {
        "resolution": 25000,
        "chrom_size_path": '"{}"'.format(chrom_size_path),
    }
    chunksize = 200
    resolution = 25000

    with open("{}/cool/Snakefile_chunk_template".format(PKG_DIR)) as f:
        GENERATE_MATRIX_CHUNK_TEMPLATE = f.read()
    (output_dir / sub_dir).mkdir(exist_ok=True, parents=True)
    
    total_chunk_dirs = []
    if cell_table.shape[0] <= chunksize:
        curr_dir = output_dir / (sub_dir + "_chunk0")
        params["cell_table_path"] = '"{}"'.format(curr_dir / "cell_table.csv")
        prep_dir(str(curr_dir), cell_table, GENERATE_MATRIX_CHUNK_TEMPLATE, params)
        total_chunk_dirs.append(curr_dir)
    else:
        cell_table["chunk"] = [i // chunksize for i in range(0, cell_table.shape[0])]
        for chunk, chunk_df in cell_table.groupby("chunk"):
            curr_dir = output_dir / (sub_dir + f"_chunk{chunk}")
            params["cell_table_path"] = '"{}"'.format(curr_dir / "cell_table.csv")
            prep_dir(str(curr_dir), chunk_df, GENERATE_MATRIX_CHUNK_TEMPLATE, params)
            total_chunk_dirs.append(curr_dir)

    script_dir = output_dir / "bulk_domain_scripts"
    script_dir.mkdir(exist_ok=True)
    with (script_dir / "snakemake_cmd_step1.txt").open("w") as f:
        for chunk_dir in total_chunk_dirs:
            cmd = "snakemake -d {0} --snakefile {0}/Snakefile_master -j 5 --rerun-incomplete".format(chunk_dir)
            f.write(cmd + "\n")

    if "cell_table_path" in params.keys():
        params.pop("cell_table_path")
    params["output_dir"] = '"{}"'.format(output_dir)
    params_str = "\n".join("{} = {}".format(k, v) for k, v in params.items())
    
    with open("{}/cool/Snakefile_group_template".format(PKG_DIR), "r") as f:
        GENERATE_MATRIX_GROUP_TEMPLATE = f.read()
    
    with open(output_dir / "bulk_domain_scripts" / "Snakefile", "w") as f:
        f.write(params_str + "\n" + GENERATE_MATRIX_GROUP_TEMPLATE)
    
    with open(output_dir / "bulk_domain_scripts" / "snakemake_cmd_step2.txt", "w") as f:
        cmd = "snakemake -d {} --snakefile {} -j 10 --rerun-incomplete".format(
            output_dir,
            output_dir / "bulk_domain_scripts" / "Snakefile"
        )
        f.write(cmd + "\n")

    chunksize = 40
    with open(output_dir / "bulk_domain_scripts" / "snakemake_cmd_step1.txt", "r") as finput:
        cmds = finput.readlines()
    
    index = 0
    for i in range(0, len(cmds), chunksize):
        with open(output_dir / "bulk_domain_scripts" / "snakemake_cmd_step1_{}.txt".format(index), "w") as f:
            f.write("\n".join([xx.strip() for xx in cmds[i:(i+chunksize)]]) + "\n")
        with open(output_dir / "bulk_domain_scripts" / "step1_{}.sbatch".format(index), "w") as f:
            f.write(aggregate_sbatch_template.format(
                job_name,
                output_dir / "bulk_domain_scripts" / f"{sub_dir}.out",
                output_dir / "bulk_domain_scripts" / f"{sub_dir}.err",
                output_dir / "bulk_domain_scripts",
                output_dir / "bulk_domain_scripts" / "snakemake_cmd_step1_{}.txt".format(index)
            ))
        index += 1

## Test the functions with Oligo Age2040

In [13]:
cell_table_file_name = oligo_dir / "Age2040" / "cell_table.csv"
output_cell_table_file = oligo_dir / "Age2040" / "bulk_contacts" / "Age2040.tsv.gz"
if not (oligo_dir / "Age2040" / "bulk_contacts").exists():
    (oligo_dir / "Age2040" / "bulk_contacts").mkdir(parents=True, exist_ok=True)

In [13]:
create_bulk_contact_file(cell_table_file_name, output_cell_table_file)

In [14]:
(oligo_dir / "Age2040" / "bulk_impute" / "25kb").mkdir(parents=True, exist_ok=True)
(oligo_dir / "Age2040" / "bulk_impute" / "scripts").mkdir(parents=True, exist_ok=True)
with open(oligo_dir / "Age2040" / "bulk_impute" / "contact_table.tsv", "w") as fout:
    fout.write("Age2040\t{}\n".format(output_cell_table_file))

In [15]:
prep_impute_str = prep_impute_str_template.format(
    oligo_dir / "Age2040" / "bulk_impute" / "contact_table.tsv",
    oligo_dir / "Age2040" / "bulk_impute" / "25kb",
    chrom_size_path
)

In [16]:
## paste this to the command line
print(prep_impute_str)

hicluster prepare-impute \
	--cell_table /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age2040/bulk_impute/contact_table.tsv \
	--batch_size 1536 --pad 1 --cpu_per_job 8 \
	--output_dir /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age2040/bulk_impute/25kb \
	--chrom_size_path /tscc/projects/ps-epigen/users/biy022/scmethylhic/genome/genome_hg38/hg38.autosomal.chrom.sizes \
	--output_dist 10050000 \
	--window_size 30000000 \
	--step_size 10000000 \
	--resolution 25000



In [17]:
with open(oligo_dir / "Age2040" / "bulk_impute" / "scripts" / "sbatch.00001.sh", "w") as fout:
    impute_sbatch_str = impute_sbatch_template.format(
        "Oligo.2040",
        oligo_dir / "Age2040" / "bulk_impute" / "scripts" / "Oligo.2040.out",
        oligo_dir / "Age2040" / "bulk_impute" / "scripts" / "Oligo.2040.err",
        oligo_dir / "Age2040" / "bulk_impute",
        oligo_dir / "Age2040" / "bulk_impute" / "25kb" / "chunk0",
        oligo_dir / "Age2040" / "bulk_impute" / "25kb" / "chunk0" / "Snakefile"
    )
    fout.write(f"{impute_sbatch_str}\n")

In [18]:
output_dir = oligo_dir / "Age2040"
cool_list = list(output_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [19]:
generate_Q_cool(output_dir, cool_list, "Age2040", "Oligo.2040")

### Oligo Age 4060

In [20]:
cell_table_file_name = oligo_dir / "Age4060" / "cell_table.csv"
output_cell_table_file = oligo_dir / "Age4060" / "bulk_contacts" / "Age4060.tsv.gz"
if not (oligo_dir / "Age4060" / "bulk_contacts").exists():
    (oligo_dir / "Age4060" / "bulk_contacts").mkdir(parents=True, exist_ok=True)

In [21]:
create_bulk_contact_file(cell_table_file_name, output_cell_table_file)

In [22]:
(oligo_dir / "Age4060" / "bulk_impute" / "25kb").mkdir(parents=True, exist_ok=True)
(oligo_dir / "Age4060" / "bulk_impute" / "scripts").mkdir(parents=True, exist_ok=True)
with open(oligo_dir / "Age4060" / "bulk_impute" / "contact_table.tsv", "w") as fout:
    fout.write("Age4060\t{}\n".format(output_cell_table_file))

In [23]:
prep_impute_str = prep_impute_str_template.format(
    oligo_dir / "Age4060" / "bulk_impute" / "contact_table.tsv",
    oligo_dir / "Age4060" / "bulk_impute" / "25kb",
    chrom_size_path
)

In [24]:
## paste this to the command line
print(prep_impute_str)

hicluster prepare-impute \
	--cell_table /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age4060/bulk_impute/contact_table.tsv \
	--batch_size 1536 --pad 1 --cpu_per_job 8 \
	--output_dir /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age4060/bulk_impute/25kb \
	--chrom_size_path /tscc/projects/ps-epigen/users/biy022/scmethylhic/genome/genome_hg38/hg38.autosomal.chrom.sizes \
	--output_dist 10050000 \
	--window_size 30000000 \
	--step_size 10000000 \
	--resolution 25000



In [25]:
with open(oligo_dir / "Age4060" / "bulk_impute" / "scripts" / "sbatch.00001.sh", "w") as fout:
    impute_sbatch_str = impute_sbatch_template.format(
        "Oligo.2040",
        oligo_dir / "Age4060" / "bulk_impute" / "scripts" / "Oligo.2040.out",
        oligo_dir / "Age4060" / "bulk_impute" / "scripts" / "Oligo.2040.err",
        oligo_dir / "Age4060" / "bulk_impute",
        oligo_dir / "Age4060" / "bulk_impute" / "25kb" / "chunk0",
        oligo_dir / "Age4060" / "bulk_impute" / "25kb" / "chunk0" / "Snakefile"
    )
    fout.write(f"{impute_sbatch_str}\n")

In [26]:
output_dir = oligo_dir / "Age4060"
cool_list = list(output_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [27]:
generate_Q_cool(output_dir, cool_list, "Age4060", "Oligo.4060")

### Oligo Age 4060 downsampled

In [8]:
cell_table_file_name = oligo_dir / "Age4060_DOWN" / "cell_table.csv"
output_cell_table_file = oligo_dir / "Age4060_DOWN" / "bulk_contacts" / "Age4060_DOWN.tsv.gz"
if not (oligo_dir / "Age4060_DOWN" / "bulk_contacts").exists():
    (oligo_dir / "Age4060_DOWN" / "bulk_contacts").mkdir(parents=True, exist_ok=True)

In [15]:
create_bulk_contact_file(cell_table_file_name, output_cell_table_file)

In [16]:
(oligo_dir / "Age4060_DOWN" / "bulk_impute" / "25kb").mkdir(parents=True, exist_ok=True)
(oligo_dir / "Age4060_DOWN" / "bulk_impute" / "scripts").mkdir(parents=True, exist_ok=True)
with open(oligo_dir / "Age4060_DOWN" / "bulk_impute" / "contact_table.tsv", "w") as fout:
    fout.write("Age4060_DOWN\t{}\n".format(output_cell_table_file))

In [17]:
prep_impute_str = prep_impute_str_template.format(
    oligo_dir / "Age4060_DOWN" / "bulk_impute" / "contact_table.tsv",
    oligo_dir / "Age4060_DOWN" / "bulk_impute" / "25kb",
    chrom_size_path
)

In [18]:
## paste this to the command line
print(prep_impute_str)

hicluster prepare-impute \
	--cell_table /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age4060_DOWN/bulk_impute/contact_table.tsv \
	--batch_size 1536 --pad 1 --cpu_per_job 8 \
	--output_dir /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age4060_DOWN/bulk_impute/25kb \
	--chrom_size_path /tscc/projects/ps-epigen/users/biy022/scmethylhic/genome/genome_hg38/hg38.autosomal.chrom.sizes \
	--output_dist 10050000 \
	--window_size 30000000 \
	--step_size 10000000 \
	--resolution 25000



In [19]:
with open(oligo_dir / "Age4060_DOWN" / "bulk_impute" / "scripts" / "sbatch.00001.sh", "w") as fout:
    impute_sbatch_str = impute_sbatch_template.format(
        "Oligo.4060.DOWN",
        oligo_dir / "Age4060_DOWN" / "bulk_impute" / "scripts" / "Oligo.4060.DOWN.out",
        oligo_dir / "Age4060_DOWN" / "bulk_impute" / "scripts" / "Oligo.4060.DOWN.err",
        oligo_dir / "Age4060_DOWN" / "bulk_impute",
        oligo_dir / "Age4060_DOWN" / "bulk_impute" / "25kb" / "chunk0",
        oligo_dir / "Age4060_DOWN" / "bulk_impute" / "25kb" / "chunk0" / "Snakefile"
    )
    fout.write(f"{impute_sbatch_str}\n")

In [13]:
output_dir = oligo_dir / "Age4060_DOWN"
cool_list = list(output_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [14]:
generate_Q_cool(output_dir, cool_list, "Age4060_DOWN", "Oligo.4060.DOWN")

### Oligo Age 6080

In [22]:
cell_table_file_name = oligo_dir / "Age6080" / "cell_table.csv"
output_cell_table_file = oligo_dir / "Age6080" / "bulk_contacts" / "Age6080.tsv.gz"
if not (oligo_dir / "Age6080" / "bulk_contacts").exists():
    (oligo_dir / "Age6080" / "bulk_contacts").mkdir(parents=True, exist_ok=True)

In [23]:
create_bulk_contact_file(cell_table_file_name, output_cell_table_file)

In [24]:
(oligo_dir / "Age6080" / "bulk_impute" / "25kb").mkdir(parents=True, exist_ok=True)
(oligo_dir / "Age6080" / "bulk_impute" / "scripts").mkdir(parents=True, exist_ok=True)
with open(oligo_dir / "Age6080" / "bulk_impute" / "contact_table.tsv", "w") as fout:
    fout.write("Age6080\t{}\n".format(output_cell_table_file))

In [25]:
prep_impute_str = prep_impute_str_template.format(
    oligo_dir / "Age6080" / "bulk_impute" / "contact_table.tsv",
    oligo_dir / "Age6080" / "bulk_impute" / "25kb",
    chrom_size_path
)

In [26]:
## paste this to the command line
print(prep_impute_str)

hicluster prepare-impute \
	--cell_table /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age6080/bulk_impute/contact_table.tsv \
	--batch_size 1536 --pad 1 --cpu_per_job 8 \
	--output_dir /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age6080/bulk_impute/25kb \
	--chrom_size_path /tscc/projects/ps-epigen/users/biy022/scmethylhic/genome/genome_hg38/hg38.autosomal.chrom.sizes \
	--output_dist 10050000 \
	--window_size 30000000 \
	--step_size 10000000 \
	--resolution 25000



In [27]:
with open(oligo_dir / "Age6080" / "bulk_impute" / "scripts" / "sbatch.00001.sh", "w") as fout:
    impute_sbatch_str = impute_sbatch_template.format(
        "Oligo.6080",
        oligo_dir / "Age6080" / "bulk_impute" / "scripts" / "Oligo.6080.out",
        oligo_dir / "Age6080" / "bulk_impute" / "scripts" / "Oligo.6080.err",
        oligo_dir / "Age6080" / "bulk_impute",
        oligo_dir / "Age6080" / "bulk_impute" / "25kb" / "chunk0",
        oligo_dir / "Age6080" / "bulk_impute" / "25kb" / "chunk0" / "Snakefile"
    )
    fout.write(f"{impute_sbatch_str}\n")

In [28]:
output_dir = oligo_dir / "Age6080"
cool_list = list(output_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [29]:
generate_Q_cool(output_dir, cool_list, "Age6080", "Oligo.6080")

### Oligo Age 6080 downsampled

In [30]:
cell_table_file_name = oligo_dir / "Age6080_DOWN" / "cell_table.csv"
output_cell_table_file = oligo_dir / "Age6080_DOWN" / "bulk_contacts" / "Age6080_DOWN.tsv.gz"
if not (oligo_dir / "Age6080_DOWN" / "bulk_contacts").exists():
    (oligo_dir / "Age6080_DOWN" / "bulk_contacts").mkdir(parents=True, exist_ok=True)

In [31]:
create_bulk_contact_file(cell_table_file_name, output_cell_table_file)

In [32]:
(oligo_dir / "Age6080_DOWN" / "bulk_impute" / "25kb").mkdir(parents=True, exist_ok=True)
(oligo_dir / "Age6080_DOWN" / "bulk_impute" / "scripts").mkdir(parents=True, exist_ok=True)
with open(oligo_dir / "Age6080_DOWN" / "bulk_impute" / "contact_table.tsv", "w") as fout:
    fout.write("Age6080_DOWN\t{}\n".format(output_cell_table_file))

In [33]:
prep_impute_str = prep_impute_str_template.format(
    oligo_dir / "Age6080_DOWN" / "bulk_impute" / "contact_table.tsv",
    oligo_dir / "Age6080_DOWN" / "bulk_impute" / "25kb",
    chrom_size_path
)

In [34]:
## paste this to the command line
print(prep_impute_str)

hicluster prepare-impute \
	--cell_table /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age6080_DOWN/bulk_impute/contact_table.tsv \
	--batch_size 1536 --pad 1 --cpu_per_job 8 \
	--output_dir /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age6080_DOWN/bulk_impute/25kb \
	--chrom_size_path /tscc/projects/ps-epigen/users/biy022/scmethylhic/genome/genome_hg38/hg38.autosomal.chrom.sizes \
	--output_dist 10050000 \
	--window_size 30000000 \
	--step_size 10000000 \
	--resolution 25000



In [35]:
with open(oligo_dir / "Age6080_DOWN" / "bulk_impute" / "scripts" / "sbatch.00001.sh", "w") as fout:
    impute_sbatch_str = impute_sbatch_template.format(
        "Oligo.6080.DOWN",
        oligo_dir / "Age6080_DOWN" / "bulk_impute" / "scripts" / "Oligo.6080.DOWN.out",
        oligo_dir / "Age6080_DOWN" / "bulk_impute" / "scripts" / "Oligo.6080.DOWN.err",
        oligo_dir / "Age6080_DOWN" / "bulk_impute",
        oligo_dir / "Age6080_DOWN" / "bulk_impute" / "25kb" / "chunk0",
        oligo_dir / "Age6080_DOWN" / "bulk_impute" / "25kb" / "chunk0" / "Snakefile"
    )
    fout.write(f"{impute_sbatch_str}\n")

In [15]:
output_dir = oligo_dir / "Age6080_DOWN"
cool_list = list(output_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [16]:
generate_Q_cool(output_dir, cool_list, "Age6080_DOWN", "Oligo.6080.DOWN")

### Oligo Age 80100

In [17]:
cell_table_file_name = oligo_dir / "Age80100" / "cell_table.csv"
output_cell_table_file = oligo_dir / "Age80100" / "bulk_contacts" / "Age80100.tsv.gz"
if not (oligo_dir / "Age80100" / "bulk_contacts").exists():
    (oligo_dir / "Age80100" / "bulk_contacts").mkdir(parents=True, exist_ok=True)

In [18]:
create_bulk_contact_file(cell_table_file_name, output_cell_table_file)

In [19]:
(oligo_dir / "Age80100" / "bulk_impute" / "25kb").mkdir(parents=True, exist_ok=True)
(oligo_dir / "Age80100" / "bulk_impute" / "scripts").mkdir(parents=True, exist_ok=True)
with open(oligo_dir / "Age80100" / "bulk_impute" / "contact_table.tsv", "w") as fout:
    fout.write("Age80100\t{}\n".format(output_cell_table_file))

In [20]:
prep_impute_str = prep_impute_str_template.format(
    oligo_dir / "Age80100" / "bulk_impute" / "contact_table.tsv",
    oligo_dir / "Age80100" / "bulk_impute" / "25kb",
    chrom_size_path
)

In [21]:
## paste this to the command line
print(prep_impute_str)

hicluster prepare-impute \
	--cell_table /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age80100/bulk_impute/contact_table.tsv \
	--batch_size 1536 --pad 1 --cpu_per_job 8 \
	--output_dir /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/domain_cluster_age/Oligo/Age80100/bulk_impute/25kb \
	--chrom_size_path /tscc/projects/ps-epigen/users/biy022/scmethylhic/genome/genome_hg38/hg38.autosomal.chrom.sizes \
	--output_dist 10050000 \
	--window_size 30000000 \
	--step_size 10000000 \
	--resolution 25000



In [22]:
with open(oligo_dir / "Age80100" / "bulk_impute" / "scripts" / "sbatch.00001.sh", "w") as fout:
    impute_sbatch_str = impute_sbatch_template.format(
        "Oligo.80100",
        oligo_dir / "Age80100" / "bulk_impute" / "scripts" / "Oligo.80100.out",
        oligo_dir / "Age80100" / "bulk_impute" / "scripts" / "Oligo.80100.err",
        oligo_dir / "Age80100" / "bulk_impute",
        oligo_dir / "Age80100" / "bulk_impute" / "25kb" / "chunk0",
        oligo_dir / "Age80100" / "bulk_impute" / "25kb" / "chunk0" / "Snakefile"
    )
    fout.write(f"{impute_sbatch_str}\n")

In [23]:
output_dir = oligo_dir / "Age80100"
cool_list = list(output_dir.glob("bulk_impute/25kb/chunk*/*cool"))

In [24]:
generate_Q_cool(output_dir, cool_list, "Age80100", "Oligo.80100")