# Run Fontanka

Instructions
1. Convert `.hic` to ICE-balanced `.mcool` using script `scripts/hic_to_mcool.sh`
2. Select mask: either binary or hand-picked aggregate
3. Run Fontanka

In [1]:
import cooler
import cooltools
import bioframe as bf
import os
import subprocess
import numpy as np
import pandas as pd
from skimage.filters import threshold_li


# Across experiments

In [2]:
# Define experiments to process
experiments = [
    { # SUCCESSFULLY GENERATED
        "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/Repli-HiC_K562_WT_totalS.ice.mcool",
        "data_name": "Repli-HiC_K562_WT_totalS",
        "genome": "hg19",
    },
    { # SUCCESSFULLY GENERATED
        "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped.ice.mcool",
        "data_name": "DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped",
        "genome": "mm10",
    },
    { # SUCCESSFULLY GENERATED
        "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9.ice.mcool",
        "data_name": "splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9",
        "genome": "mm9",
    },
    { # SUCCESSFULLY GENERATED
        "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GSE199059_CD69negDPWTR1R2R3R4_merged.ice.mcool",
        "data_name": "GSE199059_CD69negDPWTR1R2R3R4_merged",
        "genome": "mm9",
    },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_insitu-hic_4DNFI1UEG1HD.ice.mcool",
    #     "data_name": "GM12878_insitu-hic_4DNFI1UEG1HD",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_cohesin-SMC1-RAD21-pooled_chiadrop_Kim-2024_4DNFI9JN3S8M_hg38.ice.mcool",
    #     "data_name": "GM12878_cohesin-SMC1-RAD21-pooled_chiadrop_Kim-2024_4DNFI9JN3S8M_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_CTCF_chiadrop_Kim-2024_4DNFIERR7BI3_hg38.ice.mcool",
    #     "data_name": "GM12878_CTCF_chiadrop_Kim-2024_4DNFIERR7BI3_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_RNAPII_chiadrop_Kim-2024_4DNFI3ZH8UYR_hg38.ice.mcool",
    #     "data_name": "GM12878_RNAPII_chiadrop_Kim-2024_4DNFI3ZH8UYR_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_control_chiapet_Kim-2024_GSE158897-GM19239_hg38.ice.mcool",
    #     "data_name": "GM12878_control_chiapet_Kim-2024_GSE158897-GM19239_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_CTCF_chiapet_Kim-2024_4DNFIR5BPZ5L_hg38.ice.mcool",
    #     "data_name": "GM12878_CTCF_chiapet_Kim-2024_4DNFIR5BPZ5L_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_RAD21_chiapet_Kim-2024_4DNFIV9RG6YP_hg38.ice.mcool",
    #     "data_name": "GM12878_RAD21_chiapet_Kim-2024_4DNFIV9RG6YP_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_RNAPII_chiapet_Kim-2024_4DNFICWBQKM9_hg38.ice.mcool",
    #     "data_name": "GM12878_RNAPII_chiapet_Kim-2024_4DNFICWBQKM9_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/HCT116_RAD21-auxin-0hr_hic_Rao-2017_4DNFIP71EWXC_hg38.ice.mcool",
    #     "data_name": "HCT116_RAD21-auxin-0hr_hic_Rao-2017_4DNFIP71EWXC_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/HCT116_RAD21-auxin-6hr_hic_Rao-2017_4DNFILIM6FDL_hg38.ice.mcool",
    #     "data_name": "HCT116_RAD21-auxin-6hr_hic_Rao-2017_4DNFILIM6FDL_hg38",
    #     "genome": "hg38",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/zebrafish-embryo_sperm_hic_Wike-2021_4DNFI4P145EM_z11.ice.mcool",
    #     "data_name": "zebrafish-embryo_sperm_hic_Wike-2021_4DNFI4P145EM_z11",
    #     "genome": "z11",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-CA1200-L2-L3-JK07-JK08_control-auxin-1hr_hic_Kim-2023_GSE188849_ce10.ice.mcool",
    #     "data_name": "c-elegans-CA1200-L2-L3-JK07-JK08_control-auxin-1hr_hic_Kim-2023_GSE188849_ce10",
    #     "genome": "ce10",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-JK05-L3_SMC3-auxin-1hr_hic_Kim-2023_GSE237663_ce10.ice.mcool",
    #     "data_name": "c-elegans-JK05-L3_SMC3-auxin-1hr_hic_Kim-2023_GSE237663_ce10",
    #     "genome": "ce10",
    # },
    # {
    #     "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-JK06-L3_WAPL-auxin-1hr_hic_Kim-2023_GSE237663_ce10.ice.mcool",
    #     "data_name": "c-elegans-JK06-L3_WAPL-auxin-1hr_hic_Kim-2023_GSE237663_ce10",
    #     "genome": "ce10",
    # },
]

# Shared parameters
resolution = 50000      # 50 kb
window_size = int(6e6)  # 6 Mb
base_save_dir = "/nfs/turbo/umms-minjilab/sionkim/jet_pred"
angle_leniency_deg = 20
num_cores = 4
angle_leniency_rad = np.radians(angle_leniency_deg)
fountain_threshold = 0 # Require positive correlation


for exp in experiments:
    hic_file = exp["hic_file"]
    data_name = exp["data_name"]
    genome = exp["genome"]
    # save_dir = os.path.join(base_save_dir, data_name)
    # os.makedirs(save_dir, exist_ok=True)
    save_dir = base_save_dir

    # Load cooler at desired resolution
    clr = cooler.Cooler(f"{hic_file}::resolutions/{resolution}", mode="r")

    # Rename chromosomes to ensure they start with "chr"
    rename_dict = {
        name: name if name.startswith("chr") else f"chr{name}"
        for name in clr.chromnames
    }

    # apply the renaming in-place
    cooler.rename_chroms(clr, rename_dict)

    chromsizes = bf.fetch_chromsizes(genome)

    try:
        # Not all genomes have centromeres (e.g. ce10)
        cens = bf.fetch_centromeres(genome)

        if cens is None or cens.empty:
            raise ValueError(f"No centromeres found for genome {genome}.")
        
        # Otherwise, use the centromeres to build arms
        arms = bf.make_chromarms(chromsizes, cens)
    except Exception:
        # Just use the chromsizes if no centromeres are available
        arms = pd.DataFrame({
            "chrom": chromsizes.index,
            "start": 0,
            "end": chromsizes.values
        })

        # Sort the dataframe to exactly match the cooler's chromnames order
        arms["chrom"] = pd.Categorical(arms["chrom"], categories=clr.chromnames, ordered=True)

        arms = arms.sort_values("chrom").reset_index(drop=True)



    # Select only chromosomes present in the cooler
    arms = arms[arms.chrom.isin(clr.chromnames)].reset_index(drop=True)

    # Overwrite the defult assignment of the "name" column
    # with genomic string coordinate
    arms["name"] = arms.apply(lambda x: f"{x.chrom}:{x.start}-{x.end}", axis=1)


    # Compute expected cis contact vector
    cvd = cooltools.expected_cis(clr=clr,
                                 view_df=arms,
                                 nproc=num_cores)

    # Save arms and expected vector for fontanka
    arms_save_path = os.path.join(save_dir, f"FONTANKA_{data_name}.arms.tsv")
    arms.to_csv(arms_save_path, sep="\t", index=False, header=False)

    cvd_save_path = os.path.join(save_dir, f"FONTANKA_{data_name}.expected.tsv")
    cvd.to_csv(cvd_save_path, sep="\t", index=False)

    # Extract snips
    snips_path = os.path.join(save_dir, f"FONTANKA_{data_name}.{resolution}.snips.npy")

    cmd = [
        "conda", "run", "-n", "fontanka", # this is needed to run the command in the fontanka conda env
        "fontanka", "slice-windows",
        f"{hic_file}::resolutions/{resolution}",
        snips_path, # this is the output file (i.e. snips)
        "-W", str(window_size),
        "-p", f"{num_cores}", # number of cores
        "--view", arms_save_path,
        "--expected", cvd_save_path,
    ]

    subprocess.run(cmd, check=True)

    # Apply binary fountain mask
    out_path = os.path.join(save_dir, f"FONTANKA_{data_name}.{resolution}.predicted.fountains.tsv")
    mask_cmd = [
        "conda", "run", "-n", "fontanka",
        "fontanka", "apply-binary-fountain-mask",
        f"{hic_file}::resolutions/{resolution}",
        out_path,
        "-A", str(angle_leniency_rad),
        "-W", str(window_size),
        "-p", str(num_cores),
        "--snips", snips_path,
        "--view", arms_save_path,
    ]
    subprocess.run(mask_cmd, check=True)

    # New: thresholding and dropNA
    results = pd.read_csv(out_path, sep="\t", index_col=0)

    results = results.dropna()

    # We apply the same thresholding scheme as in Fontanka example notebook
    li_threshold = threshold_li(results['FS_peaks'].dropna().values)
    peak_threshold = max(fountain_threshold, li_threshold)
    print(f" Using threshold: {peak_threshold}")

    results_thresholded = results.loc[results["FS_peaks"] > peak_threshold].reset_index(drop=True)    

    results_thresholded.to_csv(out_path.replace(".tsv", ".thresholded.tsv"), sep="\t")

    print(f"Finished processing {data_name} (genome: {genome})")

INFO:root:creating a Pool of 4 workers
  groups = dict(iter(bins.groupby("chrom")[clr_weight_name]))
  self[key]
  value = self[key]
  del self[key]
  mcm.cmap_d['Grays'] = mcm.cmap_d.pop('Greys')
  mcm.cmap_d['Spectral'] = mcm.cmap_d['Spectral'].reversed(name='Spectral')
  _cmap = mcm.cmap_d.get(_name, None)
  mcm.cmap_d[_name] = LinearSegmentedColormap.from_list(
  for key in self._mapping:
  yield (key, self._mapping[key])
INFO:fontanka.cli.slice_windows:Slicing /nfs/turbo/umms-minjilab/downloaded_data/Repli-HiC_K562_WT_totalS.ice.mcool::resolutions/50000 with window size: 6000000
INFO:fontanka.lib.utils:Generating stack of snips...
INFO:fontanka.lib.utils:Saving stack into /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_Repli-HiC_K562_WT_totalS.50000.snips.npy...
INFO:fontanka.cli.slice_windows:Finished generating stack, stack shape:(241, 241, 61927) 
INFO:fontanka.cli.slice_windows:Saved stack to /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_Repli-HiC_K562_WT_totalS.50000.

 Using threshold: 1.0031946635254958e-05
Finished processing Repli-HiC_K562_WT_totalS (genome: hg19)


INFO:root:creating a Pool of 4 workers
  groups = dict(iter(bins.groupby("chrom")[clr_weight_name]))
  self[key]
  value = self[key]
  del self[key]
  mcm.cmap_d['Grays'] = mcm.cmap_d.pop('Greys')
  mcm.cmap_d['Spectral'] = mcm.cmap_d['Spectral'].reversed(name='Spectral')
  _cmap = mcm.cmap_d.get(_name, None)
  mcm.cmap_d[_name] = LinearSegmentedColormap.from_list(
  for key in self._mapping:
  yield (key, self._mapping[key])
INFO:fontanka.cli.slice_windows:Slicing /nfs/turbo/umms-minjilab/downloaded_data/DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped.ice.mcool::resolutions/50000 with window size: 6000000
INFO:fontanka.lib.utils:Generating stack of snips...
INFO:fontanka.lib.utils:Saving stack into /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped.50000.snips.npy...
INFO:fontanka.cli.slice_windows:Finished generating stack, stack shape:(241, 241, 54521) 
INFO:fontanka.cli.slice_windows:Saved stack to /nfs/turbo/umms-minjilab

 Using threshold: 8.583670781841492e-06
Finished processing DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped (genome: mm10)


  groups = dict(iter(bins.groupby("chrom")[clr_weight_name]))
  self[key]
  value = self[key]
  del self[key]
  mcm.cmap_d['Grays'] = mcm.cmap_d.pop('Greys')
  mcm.cmap_d['Spectral'] = mcm.cmap_d['Spectral'].reversed(name='Spectral')
  _cmap = mcm.cmap_d.get(_name, None)
  mcm.cmap_d[_name] = LinearSegmentedColormap.from_list(
  for key in self._mapping:
  yield (key, self._mapping[key])
INFO:fontanka.cli.slice_windows:Slicing /nfs/turbo/umms-minjilab/downloaded_data/splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9.ice.mcool::resolutions/50000 with window size: 6000000
INFO:fontanka.lib.utils:Generating stack of snips...
INFO:fontanka.lib.utils:Saving stack into /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9.50000.snips.npy...
INFO:fontanka.cli.slice_windows:Finished generating stack, stack shape:(241, 241, 53107) 
INFO:fontanka.cli.slice_windows:Saved stack to /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_

 Using threshold: 9.41083105267555e-06
Finished processing splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9 (genome: mm9)


  groups = dict(iter(bins.groupby("chrom")[clr_weight_name]))
  self[key]
  value = self[key]
  del self[key]
  mcm.cmap_d['Grays'] = mcm.cmap_d.pop('Greys')
  mcm.cmap_d['Spectral'] = mcm.cmap_d['Spectral'].reversed(name='Spectral')
  _cmap = mcm.cmap_d.get(_name, None)
  mcm.cmap_d[_name] = LinearSegmentedColormap.from_list(
  for key in self._mapping:
  yield (key, self._mapping[key])
INFO:fontanka.cli.slice_windows:Slicing /nfs/turbo/umms-minjilab/downloaded_data/GSE199059_CD69negDPWTR1R2R3R4_merged.ice.mcool::resolutions/50000 with window size: 6000000
INFO:fontanka.lib.utils:Generating stack of snips...
INFO:fontanka.lib.utils:Saving stack into /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_GSE199059_CD69negDPWTR1R2R3R4_merged.50000.snips.npy...
INFO:fontanka.cli.slice_windows:Finished generating stack, stack shape:(241, 241, 53107) 
INFO:fontanka.cli.slice_windows:Saved stack to /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_GSE199059_CD69negDPWTR1R2R3R4_merged.50000.sni

 Using threshold: 8.993036061930985e-06
Finished processing GSE199059_CD69negDPWTR1R2R3R4_merged (genome: mm9)


  self[key]
  value = self[key]
  del self[key]
  mcm.cmap_d['Grays'] = mcm.cmap_d.pop('Greys')
  mcm.cmap_d['Spectral'] = mcm.cmap_d['Spectral'].reversed(name='Spectral')
  _cmap = mcm.cmap_d.get(_name, None)
  mcm.cmap_d[_name] = LinearSegmentedColormap.from_list(
  for key in self._mapping:
  yield (key, self._mapping[key])
INFO:fontanka.cli.apply_binary_fountain_mask:Running fountain calling for: /nfs/turbo/umms-minjilab/downloaded_data/GSE199059_CD69negDPWTR1R2R3R4_merged.ice.mcool::resolutions/50000, 
fountain angle:0.34907, window size: 6000000
INFO:fontanka.lib.utils:Reading stack from /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_GSE199059_CD69negDPWTR1R2R3R4_merged.50000.snips.npy...
INFO:fontanka.cli.apply_binary_fountain_mask:Finished generating stack, stack shape:(241, 241, 53107) 
INFO:fontanka.lib.utils:Generating fountain score...
100%|██████████| 53107/53107 [01:07<00:00, 789.92it/s]s]
INFO:fontanka.lib.utils:Generating Scharr score...
100%|██████████| 53107/53107

# Individual

In [2]:
hic_file = "/nfs/turbo/umms-minjilab/downloaded_data/Repli-HiC_K562_WT_totalS.ice.mcool" 
data_name = "Repli-HiC_K562_WT_totalS"
genome = "hg19"

fountain_threshold = 0 # Require positive correlation

resolution = 50000  # 50 kb
window_size = int(6e6)
save_dir = "/nfs/turbo/umms-minjilab/sionkim/jet_pred"

### Run fontanka preliminary programs
* `expected-cis` (which we call using cooltools API)
* `fontanka slice-windows` 

In [3]:
clr = cooler.Cooler(f"{hic_file}::resolutions/{resolution}", mode="r")

In [4]:
# Rename chromosomes to ensure they start with "chr"
rename_dict = {
    name: name if name.startswith("chr") else f"chr{name}"
    for name in clr.chromnames
}

# apply the renaming in-place
cooler.rename_chroms(clr, rename_dict)

In [5]:
chromsizes = bf.fetch_chromsizes(genome)

try:
    # Not all genomes have centromeres (e.g. ce10)
    cens = bf.fetch_centromeres(genome)

    if cens is None or cens.empty:
        raise ValueError(f"No centromeres found for genome {genome}.")
    
    # Otherwise, use the centromeres to build arms
    arms = bf.make_chromarms(chromsizes, cens)
except Exception:
    # Just use the chromsizes if no centromeres are available
    arms = pd.DataFrame({
        "chrom": chromsizes.index,
        "start": 0,
        "end": chromsizes.values
    })

# Select only chromosomes present in the cooler
arms = arms[arms.chrom.isin(clr.chromnames)].reset_index(drop=True)

# Overwrite the defult assignment of the "name" column
# with genomic string coordinate
arms["name"] = arms.apply(lambda x: f"{x.chrom}:{x.start}-{x.end}", axis=1)

arms

Unnamed: 0,chrom,start,end,name
0,chr1,0,125000000,chr1:0-125000000
1,chr1,125000000,249250621,chr1:125000000-249250621
2,chr2,0,93300000,chr2:0-93300000
3,chr2,93300000,243199373,chr2:93300000-243199373
4,chr3,0,91000000,chr3:0-91000000
5,chr3,91000000,198022430,chr3:91000000-198022430
6,chr4,0,50400000,chr4:0-50400000
7,chr4,50400000,191154276,chr4:50400000-191154276
8,chr5,0,48400000,chr5:0-48400000
9,chr5,48400000,180915260,chr5:48400000-180915260


In [6]:
# Compute the expected cis contact vector for OE normalization
cvd = cooltools.expected_cis(clr=clr, 
                             view_df=arms, 
                             nproc=4)

INFO:root:creating a Pool of 4 workers
  groups = dict(iter(bins.groupby("chrom")[clr_weight_name]))


In [7]:
# Save arms as a .tsv to provide fontanka with chromosome arms
arms_save_path = os.path.join(save_dir, f"FONTANKA_{data_name}.arms.tsv")
arms.to_csv(arms_save_path, sep="\t", index=False, header=False)

# Save cvd as a .tsv to provide fontanka with the expected vector
cvd_save_path = os.path.join(save_dir, f"FONTANKA_{data_name}.expected.tsv")
cvd.to_csv(cvd_save_path, sep="\t", index=False)

In [8]:
snips_path = os.path.join(save_dir, f"FONTANKA_{data_name}.{resolution}.snips.npy")

cmd = [
    "conda", "run", "-n", "fontanka", # this is needed to run the command in the fontanka conda env
    "fontanka", "slice-windows",
    f"{hic_file}::resolutions/{resolution}",
    snips_path, # this is the output file (i.e. snips)
    "-W", str(window_size),
    "-p", "4", # number of cores
    "--view", arms_save_path,
    "--expected", cvd_save_path,
]

subprocess.run(cmd, check=True)

KeyboardInterrupt: 

### Choose mask and run main Fontanka program

* Binary mask
* Aggregate map

In [21]:
angle_leniency_deg = 20 # to mirror 80 - 100 for miajet
angle_leniency_rad = np.radians(angle_leniency_deg)  # convert degrees to radians
out_path = os.path.join(save_dir, f"FONTANKA_{data_name}.{resolution}.predicted.fountains.tsv")

# First try binary mask
cmd = [
    "conda", "run", "-n", "fontanka", # this is needed to run the command in the fontanka conda env
    "fontanka", "apply-binary-fountain-mask",
    f"{hic_file}::resolutions/{resolution}",
    out_path, # this is the output file (i.e. snips)
    "-A", str(angle_leniency_rad), # angle leniency in radians
    "-W", str(window_size),
    "-p", "4", # number of cores
    "--snips", snips_path,
    "--view", arms_save_path,
]

subprocess.run(cmd, check=True)

  self[key]
  value = self[key]
  del self[key]
  mcm.cmap_d['Grays'] = mcm.cmap_d.pop('Greys')
  mcm.cmap_d['Spectral'] = mcm.cmap_d['Spectral'].reversed(name='Spectral')
  _cmap = mcm.cmap_d.get(_name, None)
  mcm.cmap_d[_name] = LinearSegmentedColormap.from_list(
  for key in self._mapping:
  yield (key, self._mapping[key])
INFO:fontanka.cli.apply_binary_fountain_mask:Running fountain calling for: /nfs/turbo/umms-minjilab/downloaded_data/Repli-HiC_K562_WT_totalS.ice.mcool::resolutions/50000, 
fountain angle:0.34907, window size: 6000000
INFO:fontanka.lib.utils:Reading stack from /nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_Repli-HiC_K562_WT_totalS.50000.snips.npy...
INFO:fontanka.cli.apply_binary_fountain_mask:Finished generating stack, stack shape:(241, 241, 61927) 
INFO:fontanka.lib.utils:Generating fountain score...
100%|██████████| 61927/61927 [01:20<00:00, 768.28it/s]]]
INFO:fontanka.lib.utils:Generating Scharr score...
100%|██████████| 61927/61927 [06:07<00:00, 168.34it/

CompletedProcess(args=['conda', 'run', '-n', 'fontanka', 'fontanka', 'apply-binary-fountain-mask', '/nfs/turbo/umms-minjilab/downloaded_data/Repli-HiC_K562_WT_totalS.ice.mcool::resolutions/50000', '/nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_Repli-HiC_K562_WT_totalS.50000.predicted.fountains.tsv', '-A', '0.3490658503988659', '-W', '6000000', '-p', '4', '--snips', '/nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_Repli-HiC_K562_WT_totalS.50000.snips.npy', '--view', '/nfs/turbo/umms-minjilab/sionkim/jet_pred/FONTANKA_Repli-HiC_K562_WT_totalS.arms.tsv'], returncode=0)

In [22]:
# Let's read results in and threshold for a fair comparison

In [42]:
results = pd.read_csv(out_path, sep="\t", index_col=0)
results

Unnamed: 0,chrom,start,end,window_start,window_end,FS,FS_peaks,Scharr,Scharr_box
0,chr1,0,50000,-6000000,6050000,,,,
1,chr1,50000,100000,-5950000,6100000,,,,
2,chr1,100000,150000,-5900000,6150000,,,,
3,chr1,150000,200000,-5850000,6200000,,,,
4,chr1,200000,250000,-5800000,6250000,,,,
...,...,...,...,...,...,...,...,...,...
61922,chrY,59150000,59200000,53150000,65200000,,,,
61923,chrY,59200000,59250000,53200000,65250000,,,,
61924,chrY,59250000,59300000,53250000,65300000,,,,
61925,chrY,59300000,59350000,53300000,65350000,,,,


In [43]:
# Drop NAN 
results = results.dropna()

In [44]:
# We apply the same thresholding scheme as in Fontanka example notebook
li_threshold = threshold_li(results['FS_peaks'].dropna().values)
peak_threshold = max(fountain_threshold, li_threshold)
print(f"Using threshold: {peak_threshold}")

results_thresholded = results.loc[results["FS_peaks"] > peak_threshold].reset_index(drop=True)

Using threshold: 1.0031946635254958e-05


In [46]:
results_thresholded.to_csv(out_path.replace(".tsv", ".thresholded.tsv"), sep="\t")