# Run Fun


Example

`python ../Fun/Fun calculate-son-score --help
`

In [1]:
import bioframe as bf
import subprocess
import cooler
import os

import yaml                                             


# Across experiments

In [None]:
with open("../submit_all_config.yaml", "r") as f:                     
    cfg = yaml.safe_load(f)

experiments = [{"hic_file": s.get("mcool"), 
                "data_name": name, 
                "genome": s["genome"], 
                "res": int(s["res"]), 
                "win": int(s["win"])} for name, s in cfg.get("samples", {}).items()]


# Shared parameters
# resolution = 50000      # 50 kb 
# ext_length = 2500000  # 2.5 Mb # half of window size
coverage_ratio = 0 # sparse region threshold (default option)
padding_width = 2 # if 2, then 2 bin on left and 2 bin on right (total 5 bin width including center)
# offset = 5 * resolution
# interval_length = 5 * resolution # ensure it's ~5-fold of resolution as in example
# max_merge_distance = 5 * resolution
p_value = 0.05
# HARD-CODED PARAMETER: "--extension_pixels", "10", "100", "5",
# HARD-CODED PARAMETER: signal_noise_background = 1.3
base_save_dir = "/nfs/turbo/umms-minjilab/sionkim/jet_pred"

for exp in experiments: 
    hic_file = exp["hic_file"]
    data_name = exp["data_name"]
    genome = exp["genome"]
    resolution = exp["res"]
    window_size = exp["win"]
    buffer = window_size // 6

    ext_length = (window_size - buffer) // 2 # 3 mb if window_size=6 mb
    offset = 5 * resolution
    interval_length = 5 * resolution
    max_merge_distance = 5 * resolution

    print(ext_length)

    save_dir = os.path.join(base_save_dir, f"FUN_{data_name}")
    os.makedirs(save_dir, exist_ok=True)

    # Save the chromsizes to a file to give to FUN program
    chromsizes = bf.fetch_chromsizes(genome)
    f_chromsizes = f"{save_dir}/{genome}.chrom.sizes"
    chromsizes.to_csv(f_chromsizes, sep="\t", header=False)

    # Rename chromosomes because FUN requires "1" "2" "3" not "chr1" "chr2" "chr3"
    clr = cooler.Cooler(f"{hic_file}::resolutions/{resolution}", mode="r")

    # Ensure chromosome names do NOT start with "chr"
    rename_dict = {c: c.lstrip("chr") for c in clr.chromnames}
    cooler.rename_chroms(clr, rename_dict)

    cmd = [
        "conda", "run", "-n", "fun-env",  # this is needed to run the command in the FUN conda env
        "python", "/nfs/turbo/umms-minjilab/sionkim/Fun/Fun",
        "calculate-son-score",  # program name
        f"{hic_file}::resolutions/{resolution}",
        "--out_dir", save_dir,
        "--norm", "weight",
        "--use_mean", "True",
        "--coverage_ratio", f"{coverage_ratio}",  
        "--chromsize_path", f_chromsizes,
        "--ext_length", f"{ext_length}",  # 2.5 Mb
        "--padding_width", f"{padding_width}",  # if 2, then 2 bin on left and 2 bin on right (total 5 bin width including center)
        "--offset", f"{offset}",
    ]
    subprocess.run(cmd, check=True)

    f_merged_bedgraph = f"{save_dir}/SoN_track_{resolution}/SoN_{resolution}_merged.bedgraph"

    cmd = [
        "conda", "run", "-n", "fun-env",  # this is needed to run the command in the FUN conda env
        "python", "/nfs/turbo/umms-minjilab/sionkim/Fun/Fun",
        "generate-summits",  # program name
        f"{hic_file}::resolutions/{resolution}",
        "--track", f_merged_bedgraph,
        "--out_dir", save_dir,
    ]
    subprocess.run(cmd, check=True)

    f_merged_summit = f"{save_dir}/SoN_summits/Summits_{resolution}_merged.bed"
    f_output = f"{save_dir}/FUN-pred_{resolution}"

    cmd = [
        "conda", "run", "-n", "fun-env",                # activate the fun-env
        "python", "/nfs/turbo/umms-minjilab/sionkim/Fun/Fun",
        "find-fountains",                               # program name
        f"{hic_file}::resolutions/{resolution}",
        "--ext_length", f"{ext_length}",  # 5 Mb
        "--norm", "weight",
        "--offset", f"{offset}",  # ignore regions too close to the main diagonal (just one bin)
        "--coverage_ratio", f"{coverage_ratio}",
        "--half_width", f"{padding_width}",  # (same as --padding_width) if 2, then 2 bin on left and 2 bin on right (total 4 bin width)
        "--region_path", f_merged_summit,     # path to summits
        "--extension_pixels", "10", "100", "5",  # identical to script provided
        "--interval_length", f"{interval_length}",  # ensure it's ~5-fold of resolution as in example
        "--max_merge_distance", f"{max_merge_distance}",
        "--output", f_output,
        "--p_value", f"{p_value}",  
        "--signal_noise_background", "1.1", "1.2", "1.3", "1.4", "1.5",  # [1.1, 1.2, 1.3, 1.4, 1.5]
    ]
    subprocess.run(cmd, check=True)


54167


INFO:cli.calculate_SoN:Starting SoN calculation...
INFO:cli.calculate_SoN:Loading cooler object from /nfs/turbo/umms-minjilab/downloaded_data/c-elegans-CA1200-L2-L3-JK07-JK08_control-auxin-1hr_hic_Kim-2023_GSE188849_ce10.ice.mcool::resolutions/5000...
INFO:cli.calculate_SoN:Created directory for SoN tracks: /nfs/turbo/umms-minjilab/sionkim/jet_pred/FUN_c_elegans_CA1200_L2_L3_JK07_JK08_control_auxin_1hr_hic_Kim-2023_GSE188849_ce10/SoN_track_5000
INFO:cli.calculate_SoN:Processing chromosome I...
INFO:cli.calculate_SoN:SoN track for chromosome I written to /nfs/turbo/umms-minjilab/sionkim/jet_pred/FUN_c_elegans_CA1200_L2_L3_JK07_JK08_control_auxin_1hr_hic_Kim-2023_GSE188849_ce10/SoN_track_5000/chrI_5000_SoN.bedgraph
INFO:cli.calculate_SoN:Processing chromosome II...
INFO:cli.calculate_SoN:SoN track for chromosome II written to /nfs/turbo/umms-minjilab/sionkim/jet_pred/FUN_c_elegans_CA1200_L2_L3_JK07_JK08_control_auxin_1hr_hic_Kim-2023_GSE188849_ce10/SoN_track_5000/chrII_5000_SoN.bedgraph


CalledProcessError: Command '['conda', 'run', '-n', 'fun-env', 'python', '/nfs/turbo/umms-minjilab/sionkim/Fun/Fun', 'find-fountains', '/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-CA1200-L2-L3-JK07-JK08_control-auxin-1hr_hic_Kim-2023_GSE188849_ce10.ice.mcool::resolutions/5000', '--ext_length', '54167', '--norm', 'weight', '--offset', '25000', '--coverage_ratio', '0', '--half_width', '2', '--region_path', '/nfs/turbo/umms-minjilab/sionkim/jet_pred/FUN_c_elegans_CA1200_L2_L3_JK07_JK08_control_auxin_1hr_hic_Kim-2023_GSE188849_ce10/SoN_summits/Summits_5000_merged.bed', '--extension_pixels', '10', '100', '5', '--interval_length', '25000', '--max_merge_distance', '25000', '--output', '/nfs/turbo/umms-minjilab/sionkim/jet_pred/FUN_c_elegans_CA1200_L2_L3_JK07_JK08_control_auxin_1hr_hic_Kim-2023_GSE188849_ce10/FUN-pred_5000', '--p_value', '0.05', '--signal_noise_background', '1.1', '1.2', '1.3', '1.4', '1.5']' returned non-zero exit status 1.

In [None]:
# experiments = [
#     {
#         "hic_file" : "/nfs/turbo/umms-minjilab/downloaded_data/mESC_CTCF-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10.mcool",
#         "data_name" : "mESC_CTCF-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10",
#         "genome" : "mm10",
#     },
#     {
#         "hic_file" : "/nfs/turbo/umms-minjilab/downloaded_data/mESC_RAD21-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10.mcool",
#         "data_name" : "mESC_RAD21-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10",
#         "genome" : "mm10",
#     },
#     {
#         "hic_file" : "/nfs/turbo/umms-minjilab/downloaded_data/mESC_WAPL-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10.mcool",
#         "data_name" : "mESC_WAPL-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10",
#         "genome" : "mm10",
#     },
#     {
#         "hic_file" : "/nfs/turbo/umms-minjilab/downloaded_data/mESC_YY1-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10.mcool",
#         "data_name" : "mESC_YY1-auxin-3hr_microc_Hsieh-2022_GSE178982_mm10",
#         "genome" : "mm10",
#     },
#     {
#         "hic_file" : "/nfs/turbo/umms-minjilab/downloaded_data/GSE130275_mESC_WT_combined_2.6B.ice.mcool", # micro-C
#         "data_name" : "GSE130275_mESC_WT_combined_2.6B",
#         "genome" : "mm10",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/Repli-HiC_K562_WT_totalS.ice.mcool",
#         "data_name": "Repli-HiC_K562_WT_totalS",
#         "genome": "hg19",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped.ice.mcool",
#         "data_name": "DP-thymocytes_WT_hic_Guo-2022_GSE199059_mm10-remapped",
#         "genome": "mm10",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9.ice.mcool",
#         "data_name": "splenic-B-cell_WT_insitu-hic_Kieffer-Kwon-2018_GSE82144_mm9",
#         "genome": "mm9",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GSE199059_CD69negDPWTR1R2R3R4_merged.ice.mcool",
#         "data_name": "GSE199059_CD69negDPWTR1R2R3R4_merged",
#         "genome": "mm9",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_insitu-hic_4DNFI1UEG1HD.ice.mcool",
#         "data_name": "GM12878_insitu-hic_4DNFI1UEG1HD",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_cohesin-SMC1-RAD21-pooled_chiadrop_Kim-2024_4DNFI9JN3S8M_hg38.ice.mcool",
#         "data_name": "GM12878_cohesin-SMC1-RAD21-pooled_chiadrop_Kim-2024_4DNFI9JN3S8M_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_CTCF_chiadrop_Kim-2024_4DNFIERR7BI3_hg38.ice.mcool",
#         "data_name": "GM12878_CTCF_chiadrop_Kim-2024_4DNFIERR7BI3_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_RNAPII_chiadrop_Kim-2024_4DNFI3ZH8UYR_hg38.ice.mcool",
#         "data_name": "GM12878_RNAPII_chiadrop_Kim-2024_4DNFI3ZH8UYR_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_control_chiapet_Kim-2024_GSE158897-GM19239_hg38.ice.mcool",
#         "data_name": "GM12878_control_chiapet_Kim-2024_GSE158897-GM19239_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_CTCF_chiapet_Kim-2024_4DNFIR5BPZ5L_hg38.ice.mcool",
#         "data_name": "GM12878_CTCF_chiapet_Kim-2024_4DNFIR5BPZ5L_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_RAD21_chiapet_Kim-2024_4DNFIV9RG6YP_hg38.ice.mcool",
#         "data_name": "GM12878_RAD21_chiapet_Kim-2024_4DNFIV9RG6YP_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_RNAPII_chiapet_Kim-2024_4DNFICWBQKM9_hg38.ice.mcool",
#         "data_name": "GM12878_RNAPII_chiapet_Kim-2024_4DNFICWBQKM9_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/HCT116_RAD21-auxin-0hr_hic_Rao-2017_4DNFIP71EWXC_hg38.ice.mcool",
#         "data_name": "HCT116_RAD21-auxin-0hr_hic_Rao-2017_4DNFIP71EWXC_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/HCT116_RAD21-auxin-6hr_hic_Rao-2017_4DNFILIM6FDL_hg38.ice.mcool",
#         "data_name": "HCT116_RAD21-auxin-6hr_hic_Rao-2017_4DNFILIM6FDL_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/HCT116_RAD21-auxin-6hr_intacthic_Guckelberger-2024_ENCFF461RFV_hg38.ice.mcool",
#         "data_name": "HCT116_RAD21-auxin-6hr_intacthic_Guckelberger-2024_ENCFF461RFV_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/HCT116_RAD21-auxin-0hr_intacthic_Guckelberger-2024_ENCFF109GNA_hg38.ice.mcool",
#         "data_name": "HCT116_RAD21-auxin-0hr_intacthic_Guckelberger-2024_ENCFF109GNA_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/K562_hic_Rao-2014_4DNFI2R1W3YW_hg38.ice.mcool",
#         "data_name": "K562_hic_Rao-2014_4DNFI2R1W3YW_hg38",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/zebrafish-embryo_sperm_hic_Wike-2021_4DNFI4P145EM_z11.ice.mcool",
#         "data_name": "zebrafish-embryo_sperm_hic_Wike-2021_4DNFI4P145EM_z11",
#         "genome": "danRer11", 
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-CA1200-L2-L3-JK07-JK08_control-auxin-1hr_hic_Kim-2023_GSE188849_ce10.ice.mcool",
#         "data_name": "c-elegans-CA1200-L2-L3-JK07-JK08_control-auxin-1hr_hic_Kim-2023_GSE188849_ce10",
#         "genome": "ce10",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-JK05-L3_SMC3-auxin-1hr_hic_Kim-2023_GSE237663_ce10.ice.mcool",
#         "data_name": "c-elegans-JK05-L3_SMC3-auxin-1hr_hic_Kim-2023_GSE237663_ce10",
#         "genome": "ce10",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/c-elegans-JK06-L3_WAPL-auxin-1hr_hic_Kim-2023_GSE237663_ce10.ice.mcool",
#         "data_name": "c-elegans-JK06-L3_WAPL-auxin-1hr_hic_Kim-2023_GSE237663_ce10",
#         "genome": "ce10",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/GM12878_intacthic_ENCFF785BPC.ice.mcool",
#         "data_name": "GM12878_intacthic_ENCFF785BPC",
#         "genome": "hg38",
#     },
#     { # SUCCESSFULLY GENERATED
#         "hic_file": "/nfs/turbo/umms-minjilab/downloaded_data/K562_intacthic_ENCODE-2023_ENCFF808MAG_hg38.ice.mcool",
#         "data_name": "K562_intacthic_ENCODE-2023_ENCFF808MAG_hg38",
#         "genome": "hg38",
#     },
# ]