In [1]:
import configparser
import os
import subprocess

In [2]:
configfile_name = "config_landscape.ini"

In [3]:
# Add content to the file
config = configparser.ConfigParser(allow_no_value=True)

In [4]:
os.getcwd()

'/gpfs51/dors2/capra_lab/users/fongsl/enh_ages/landscape'

# Paths 

In [5]:
PROJECT_PATH = "/dors/capra_lab/projects/enhancer_ages/"
DORS_PATH = "/dors/capra_lab/data/"
BIN_PATH = "/dors/capra_lab/users/fongsl/enh_ages/landscape/"
config["PATHS"] = {
    'PROJECT': PROJECT_PATH,
    "DORS": DORS_PATH,
    "BIN":BIN_PATH,
}

In [6]:
config.add_section("MATPLOTLIB")
config["MATPLOTLIB"]["MODULE"] = os.path.join(BIN_PATH, "config_plot.py")

## ENCODE paths 

In [7]:
ENHBASE = os.path.join(PROJECT_PATH, "encode/data/")
ENCODEPATH = os.path.join(DORS_PATH, "encode/encode3_hg38/TF/")

# FANTOM fraction, length, and age enrichment 

In [8]:
FANTOMPATH = os.path.join(PROJECT_PATH, "fantom")
FANTOM_BIN = os.path.join(BIN_PATH, "fantom_hg19")
FANTOMDATA = os.path.join(FANTOMPATH, "data", "non-genic", "no-exon_all_fantom_enh",)
GENOME_BUILD = "hg19"
FANTOMFILE = os.path.join( FANTOMDATA, "ages", "syn_breaks_no-exon_all_fantom_enh_ages.bed")

config["FANTOM"]={
    "GENOME_BUILD": GENOME_BUILD,
    "DATA_PATH": FANTOMDATA, 
    "DATA_FILE": FANTOMFILE,
    "SAMPLE_ID": f"all_fanton_enh_{GENOME_BUILD}",
    "RESULTS":os.path.join(FANTOMPATH, "results"),
    "BIN_PATH": FANTOM_BIN, 
    
}

# cCRE fraction, length, and age enrichment 

In [9]:
CLs = ["HepG2", "K562"]

for CL in CLs:
    GENOME_BUILD = "hg38"
    BIN_PATH = os.path.join(BIN_PATH, f"/ENCODE3_cCRE/{CL}")  # path to scripts 
    
    cCREPATH = f"/dors/capra_lab/projects/enhancer_ages/encode/data/ELS_combined_{CL}/ages/"
    cCREFILE = os.path.join(cCREPATH, f"syn_breaks_ELS_combined_{CL}_ages.bed")

    SHUFPATH = f"/dors/capra_lab/projects/enhancer_ages/encode/data/ELS_combined_{CL}/shuffle/ages/"
    SHUFFILE = os.path.join(SHUFPATH, f"syn_breaks_shuf-ELS_combined_{CL}.bed")
    RE = f"/dors/capra_lab/projects/enhancer_ages/landscape/results/cCRE/{CL}/"


    config[f"ENCODE_{CL}_FEATURES"] = {
                                        "BIN": os.path.join(BIN_PATH, f"Figure2_AB_Figure1-{CL}_cCRE.ipynb"),
                                        "DATA_PATH":cCREPATH,
                                        "DATA_FILE": cCREFILE, 
                                        "SHUFFLE_PATH":SHUFPATH, 
                                        "SHUFFLE_FILE": SHUFFILE, 
                                        "RESULTS":RE, 
                                        "STAT_FILE":  os.path.join(RE, f"stat_{CL}_features.txt"),
                                        "GENOME_BUILD": GENOME_BUILD
                                        }   

# Zero TFBS overlap v. Non-zero TFBS overlap

In [10]:
 config["ZEROS"] = {
                    "RESULTS_HepG2": os.path.join(PROJECT_PATH, "landscape/results/cCRE_x_tfbs_encode3/HepG2/"),
                     "RESULTS_K562": os.path.join(PROJECT_PATH, "landscape/results/cCRE_x_tfbs_encode3/K562/"),
                    #"K562":
                    #"BIN":
                     }

In [11]:
config["ENCODE"] = {
                    "#":"Paths to  encode cCREs, encode TFs",
                   "PATH_CCRE":ENHBASE,
                    "PATH_TF":ENCODEPATH, 
}

# HepG2 TFBS density 

In [12]:
CL = "HepG2"
BIN_PATH = os.path.join(BIN_PATH, f"/ENCODE3_cCRE/{CL}")  # path to scripts 

COMMENT = f"Path to {CL} cCREs,\
    TFBS_DENSITY_CORE_V_DERIVED evaluates TFBS in {CL} TFBS density (Figure 4B),\
    TFBS_ENRICHMENT looks at enrichment of TFBS in core v. derived sequences of the same age (Figure 4c)"


config[f"ENCODE_{CL}_TFBS"] = {
                            "#": COMMENT, 
                            "BIN": BIN_PATH, 
                            "RESULTS": os.path.join(PROJECT_PATH, f"landscape/results/cCRE_x_tfbs_encode3/{CL}/"),
                            "RESULTS_DATA": os.path.join(PROJECT_PATH, f"landscape/results/cCRE_x_tfbs_encode3/{CL}/data/"),
                            "TFBS_DENSITY_CORE_V_DERIVED": os.path.join(BIN_PATH, f"Fig4AB-TFBS_density_{CL}.ipynb"),
                            "TFBS_ENRICHMENT": os.path.join(BIN_PATH, "Fig4C_core_v_der_TFBS_enrichment.ipynb")
                            #"# script that generates figure 4B"
                        }

# K562 TFBS density

In [13]:
CL = "K562"
BIN_PATH = os.path.join(BIN_PATH, f"/ENCODE3_cCRE/{CL}")  # path to scripts 


config[f"ENCODE_{CL}_TFBS"] = {
                            "#": f"Path to {CL} cCREs,\
                            TFBS_DENSITY_CORE_V_DERIVED evaluates TFBS in {CL} TFBS density (Figure 4B),\
                            TFBS_ENRICHMENT looks at enrichment of TFBS in core v. derived sequences of the same age (Figure 4c)", 
                            "BIN": BIN_PATH, 
                            "RESULTS": os.path.join(PROJECT_PATH, f"landscape/results/cCRE_x_tfbs_encode3/{CL}/"),
                            "RESULTS_DATA": os.path.join(PROJECT_PATH, f"landscape/results/cCRE_x_tfbs_encode3/{CL}/data/"),
                            "TFBS_DENSITY_CORE_V_DERIVED": os.path.join(BIN_PATH, f"Fig4AB-TFBS_density_{CL}.ipynb"),
                            "TFBS_ENRICHMENT": os.path.join(BIN_PATH, "Fig4C_core_v_der_TFBS_enrichment.ipynb")
                            #"# script that generates figure 4B"
}

# Ernst 2016

In [14]:
BIN_PATH = os.path.join(BIN_PATH, "ernst_2016")  # path to scripts 

config["MPRA"]={
                "BIN": os.path.join(BIN_PATH, "figure5_activity.ipynb")
}

# GC_CONTENT 

In [15]:
config["GC_CONTENT"] ={
                        "FANTOM": os.path.join(FANTOM_BIN, "FigS-GC_content.ipynb")
}

In [16]:
with open(configfile_name, 'w') as configfile:
    config.write(configfile)
    configfile.close()

In [17]:
NAME= "make_table"
cmd = f"jupyter nbconvert --to script {NAME}.ipynb"
subprocess.call(cmd, shell = True)

255