# Create scripts for domain analysis

In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from glob import glob
import schicluster

## Load list

In [2]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/")
hh_root = root / "human_hippocampus/snm3c"
domain_dir = hh_root / "Combined/hic/domain"

In [3]:
cool_paths = domain_dir.glob("*/*cool")

In [4]:
sbatch_header = (
    "#! /bin/bash\n"
    "#SBATCH -p condo\n"
    "#SBATCH -q condo\n"
    "#SBATCH -A csd772\n"
    "#SBATCH -J hh_{0}_s{1}_{2}\n"
    "#SBATCH -N {3}\n"
    "#SBATCH -c {4}\n"
    "#SBATCH --mem {5}G\n"
    "#SBATCH -t {6}:00:00\n"
    "#SBATCH -o /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/{0}/{2}/step{1}_{2}.out\n"
    "#SBATCH -e /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/{0}/{2}/step{1}_{2}.err\n"
    "#SBATCH --mail-user biy022@health.ucsd.edu\n"
    "#SBATCH --mail-type FAIL\n"
    "\n"
    "source ~/.bashrc\n"
    "conda activate schicluster\n"
    "cd {7}\n"
)

In [5]:
for curr_path in cool_paths:
    cluster = curr_path.name.split(".")[0]
    with open(curr_path.parent / "bulk_table.tsv", "w") as f:
        f.write("{}\t{}\n".format(cluster, curr_path))
    with open(curr_path.parent / "call_domain.sbatch", "w") as f:
        curr_header = sbatch_header.format(
            "domain",
            3,
            cluster,
            1,
            16,
            100,
            4,
            curr_path.parent
        )
        f.write(curr_header + "\n")
        cmd = (
            "hicluster domain "
            "--cell_table_path {} "
            "--output_prefix {} "
            "--resolution 25000 "
            "--cpu 16"
        )
        cmd = cmd.format(
            curr_path.parent / "bulk_table.tsv",
            curr_path.parent / "{}".format(cluster)
        )
        f.write(cmd + "\n")

## Subclass x donor

In [2]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/")
hh_root = root / "human_hippocampus/snm3c"
domain_dir = hh_root / "Combined/hic/domain_subclass_donor"

In [3]:
cool_paths = domain_dir.glob("*/*/*cool")

In [4]:
sbatch_header = (
    "#! /bin/bash\n"
    "#SBATCH -p condo\n"
    "#SBATCH -q condo\n"
    "#SBATCH -A csd772\n"
    "#SBATCH -J hh_{0}_s{1}_{2}\n"
    "#SBATCH -N {3}\n"
    "#SBATCH -c {4}\n"
    "#SBATCH --mem {5}G\n"
    "#SBATCH -t {6}:00:00\n"
    "#SBATCH -o /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/{0}/{8}/step{1}_{2}.out\n"
    "#SBATCH -e /tscc/projects/ps-epigen/users/biy022/scmethylhic/human_hippocampus/snm3c/Combined/hic/{0}/{8}/step{1}_{2}.err\n"
    "#SBATCH --mail-user biy022@health.ucsd.edu\n"
    "#SBATCH --mail-type FAIL\n"
    "\n"
    "source ~/.bashrc\n"
    "conda activate schicluster\n"
    "cd {7}\n"
)

In [5]:
for curr_path in cool_paths:
    cluster = curr_path.parts[-3]
    donor = curr_path.parts[-2]
    with open(curr_path.parent / "bulk_table.tsv", "w") as f:
        f.write("{}_{}\t{}\n".format(cluster, donor, curr_path))
    with open(curr_path.parent / "call_domain.sbatch", "w") as f:
        curr_header = sbatch_header.format(
            "domain_subclass_donor",
            3,
            f"{cluster}_{donor}",
            1,
            8,
            50,
            4,
            curr_path.parent,
            f"{cluster}/{donor}"
        )
        f.write(curr_header + "\n")
        cmd = (
            "hicluster domain "
            "--cell_table_path {} "
            "--output_prefix {} "
            "--resolution 25000 "
            "--cpu {}"
        )
        cmd = cmd.format(
            curr_path.parent / "bulk_table.tsv",
            curr_path.parent / "{}_{}".format(cluster, donor),
            8
        )
        f.write(cmd + "\n")

## Create cell table to combine all subclasses into a single object

In [2]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/")
hh_root = root / "human_hippocampus/snm3c"
domain_dir = hh_root / "Combined/hic/domain"

In [3]:
cool_paths = list(domain_dir.glob("*/*.Q.cool"))
subclass_table = pd.DataFrame(
    [str(xx) for xx in cool_paths],
    index=[xx.parts[-2] for xx in cool_paths],
    columns=["cool_path"]
)
subclass_table.to_csv(domain_dir / "bulk_table.tsv", sep="\t", header=False, index=True)

In [2]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/")
hh_root = root / "human_hippocampus/snm3c"
domain_dir = hh_root / "Combined/hic/domain_cluster"

In [3]:
cool_paths = list(domain_dir.glob("*/*.Q.cool"))
subclass_table = pd.DataFrame(
    [str(xx) for xx in cool_paths],
    index=[xx.parts[-2] for xx in cool_paths],
    columns=["cool_path"]
)
subclass_table.to_csv(domain_dir / "bulk_table.tsv", sep="\t", header=False, index=True)

In [2]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/")
hh_root = root / "human_hippocampus/snm3c"
domain_dir = hh_root / "Combined/hic/domain_cluster_age"

In [3]:
cool_paths = list(domain_dir.glob("*/*/*.Q.cool"))
subclass_table = pd.DataFrame(
    [str(xx) for xx in cool_paths],
    index=["_".join([xx.parts[-3], xx.parts[-2]]) for xx in cool_paths],
    columns=["cool_path"]
)
subclass_table.to_csv(domain_dir / "bulk_table.tsv", sep="\t", header=False, index=True)

In [4]:
root = Path("/tscc/projects/ps-epigen/users/biy022/scmethylhic/")
hh_root = root / "human_hippocampus/snm3c"
domain_dir = hh_root / "Combined/hic/domain_subclass_donor"

In [5]:
cool_paths = list(domain_dir.glob("*/*/*.Q.cool"))
subclass_table = pd.DataFrame(
    [str(xx) for xx in cool_paths],
    index=["_".join([xx.parts[-3], xx.parts[-2]]) for xx in cool_paths],
    columns=["cool_path"]
)
subclass_table.to_csv(domain_dir / "bulk_table.tsv", sep="\t", header=False, index=True)