In [137]:
import subprocess as sp
import os
EXPERIMENT = "ENCSR000EEC"
PEAK = "ENCFF005YUC"
INPUT_LENGTH = "1000"

chrom_size_file = "/users/zahoor/reference/GRCh38_EBV.chrom.sizes"
peak_path = "/mnt/lab_data2/vir/tf_chr_atlas/data/idr_peaks/"
base_path = "/mnt/lab_data2/vir/tf_chr_atlas/02-24-2021/"
predictions_path = base_path + "/predictions/" + EXPERIMENT + "/"
chroms = "$(cat ~/reference/hg38_chroms.txt)"
h5tobw_path = "/mnt/lab_data2/vir/tf_chr_atlas/02-24-2021/3M/importance_hdf5_to_bigwig.py"
shap_path = base_path + "/shap/" + EXPERIMENT + "/"
data_path = base_path + "/bigWigs/"
reports_path = "/mnt/lab_data2/vir/tf_chr_atlas/02-24-2021/reports/tfmodisco/notebooks/"
moods_filtered_file = "moods_filtered_collapsed_tested_fdr.bed"

srv_path = "/srv/www/kundaje/vir/tf_atlas/results/data/" + EXPERIMENT + "/"
www_path = "http://mitra.stanford.edu/kundaje/vir/tf_atlas/results/data/"




for STRAND in ["plus","minus"]:
    comm = ["logits2profile"]
    comm += ["--logits-file", predictions_path+EXPERIMENT+"_split000_task0_"+STRAND+".bw"]
    comm += ["--counts-file", predictions_path+EXPERIMENT+"_split000_task0_"+STRAND+"_exponentiated_counts.bw"]
    comm += ["--output-directory", predictions_path]
    comm += ["--output-filename", "prediction_"+STRAND]
    comm += ["--peaks", peak_path+PEAK+".bed.gz"]
    comm += ["--chroms", chroms]
    comm += ["--chrom-sizes", chrom_size_file]
    comm += ["--window-size", INPUT_LENGTH]

    proc = sp.Popen(" ".join(comm), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
    res = proc.communicate()

In [138]:
for HEAD in ["counts","profile"]:
    comm = ["python", h5tobw_path]
    comm += ["-h5", shap_path+HEAD+"_scores.h5"]
    comm += ["-r", shap_path+"peaks_valid_scores.bed"]
    comm += ["-c", chrom_size_file]
    comm += ["-o", shap_path+HEAD+"_scores.bw"]
    comm += ["-s", shap_path+HEAD+"_scores.stats.txt"]
    comm += ["-t", "1"]
    
    proc = sp.Popen(" ".join(comm), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
    res = proc.communicate()

In [147]:

def sort_compress_index(file,input_path,output_file,output_path):
    import pandas as pd
    filtered_df = pd.read_csv(input_path+file,sep="\t",header=None).loc[:,0:3]
    filtered_df.to_csv(output_path+output_file+"_filtered",sep="\t",header=False,index=False)
    
    comm = ["sort", "-k1,1", "-k2,2n", "-o", output_path+output_file, output_path+output_file+"_filtered"]
    proc = sp.Popen(" ".join(comm), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
    proc.wait()
    
    comm = ["bgzip", "-f", output_path+output_file]
    proc = sp.Popen(" ".join(comm), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
    proc.wait()
    
    comm = ["tabix", "-p", "bed", output_path+output_file+".gz"]
    proc = sp.Popen(" ".join(comm), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
    proc.wait()

In [148]:
sort_compress_index(file=PEAK+".bed.gz",
                    input_path=peak_path,
                    output_file="peaks.bed",
                    output_path=predictions_path)

for HEAD in ["counts","profile"]:
    sort_compress_index(file=moods_filtered_file,
                        input_path=reports_path+EXPERIMENT+"/moods/"+HEAD+"/",
                        output_file=HEAD+"_moods_filtered_collapsed_tested.sorted_bed",
                        output_path=predictions_path)


In [149]:
import os
if not os.path.exists(srv_path):
   os.makedirs(srv_path, mode = 0o777)

comm = ["cp",shap_path+"counts_scores.bw" ,srv_path+"counts_scores.bw;"]
comm += ["cp",shap_path+"profile_scores.bw" ,srv_path+"profile_scores.bw;"]

comm += ["cp",data_path+EXPERIMENT+"_plus.bw" ,srv_path+"plus.bw;"]
comm += ["cp",data_path+EXPERIMENT+"_minus.bw" ,srv_path+"minus.bw;"]

comm += ["cp",predictions_path+"prediction*.bw" ,srv_path+" ;"]

comm += ["cp",predictions_path+"*_moods*" ,srv_path+" ;"]

comm += ["cp",predictions_path+"peaks*" ,srv_path+" ;"]

proc = sp.Popen(" ".join(comm), shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
proc.wait()


0

In [150]:
import json
#Create a JSON Object
json_obj = []
json_obj.append({
    "type": "bed",
    "url": www_path+EXPERIMENT+"/peaks.bed.gz",
    "name": "peaks",
    "showOnHubLoad":"true",
    "options": {
        "color": "#21273d"
        },
    "metadata": {
        "factor": "MAFF",
        "tissue": "HepG2",
        "experiment":EXPERIMENT,
        "peak":PEAK
        }
})
json_obj.append({
    "type": "matplot",
        "name": "observed",
        "showOnHubLoad":"true",
        "tracks": [
            {
            "type": "bigwig",
            "url": www_path+EXPERIMENT+"/plus.bw",
            "name": "plus",
            "options": {
                "color": "#164af4"
                },
            "metadata": {
                "factor": "MAFF",
                "tissue": "HepG2",
                "experiment":EXPERIMENT
                }
            },
            {
            "type": "bigwig",
            "url": www_path+EXPERIMENT+"/minus.bw",
            "name": "minus",
            "options": {
                "color": "#f43116"
                },
            "metadata": {
                "factor": "MAFF",
                "tissue": "HepG2",
                "experiment":EXPERIMENT
                }
            }
        ],
        "options": {
                "aggregateMethod":"SUM"
        }
    
})
json_obj.append({
    "type": "matplot",
        "name": "predicted",
        "showOnHubLoad":"true",
        "tracks": [
            {
            "type": "bigwig",
            "url": www_path+EXPERIMENT+"/prediction_plus.bw",
            "name": "plus",
            "options": {
                "color": "#667dc9"
                },
            "metadata": {
                "factor": "MAFF",
                "tissue": "HepG2",
                "experiment":EXPERIMENT
                }
            },
            {
            "type": "bigwig",
            "url": www_path+EXPERIMENT+"/prediction_minus.bw",
            "name": "minus",
            "options": {
                "color": "#fc7662"
                },
            "metadata": {
                "factor": "MAFF",
                "tissue": "HepG2",
                "experiment":EXPERIMENT
                }
            }
        ],
        "options": {
                "aggregateMethod":"SUM"
        }
    
})
json_obj.append({
    "type": "bed",
    "url": www_path+EXPERIMENT+"/counts_moods_filtered_collapsed_tested.sorted_bed.gz",
    "name": "count motifs",
    "showOnHubLoad":"true",
    "options": {
        "color": "#4a435e"
        },
    "metadata": {
        "factor": "MAFF",
        "tissue": "HepG2",
        "experiment":EXPERIMENT,
        "peak":PEAK
        }
})
json_obj.append({
    "type": "bed",
    "url": www_path+EXPERIMENT+"/profile_moods_filtered_collapsed_tested.sorted_bed.gz",
    "name": "profile motifs",
    "showOnHubLoad":"true",
    "options": {
        "color": "#4a435e"
        },
    "metadata": {
        "factor": "MAFF",
        "tissue": "HepG2",
        "experiment":EXPERIMENT,
        "peak":PEAK
        }
})
json_obj.append({
    "type" : "dynseq",
    "url" : www_path+EXPERIMENT+"/counts_scores.bw",
    "name" : "counts importance",
    "showOnHubLoad" : "true",
    "options": {
        "color": "#59b777"
        },
    "metadata": {
        "factor": "MAFF",
        "tissue": "HepG2",
        "experiment":EXPERIMENT
        }
    })
json_obj.append({
    "type": "dynseq",
    "url": www_path+EXPERIMENT+"/profile_scores.bw",
    "name": "profile importance",
    "showOnHubLoad":"true",
    "options": {
        "color": "#59b777"
        },
    "metadata": {
        "factor": "MAFF",
        "tissue": "HepG2",
        "experiment":EXPERIMENT
        }
    })
json_obj.append({
    "type": "bigwig",
    "url": www_path+"misc/ENCSR149XIL.merged.nodup.pval.signal.bigwig",
    "name": "DHS pvalue signal",
    "showOnHubLoad":"true",
    "options": {
        "color": "#21273d"
        },
    "metadata": {
        "factor": "MAFF",
        "tissue": "HepG2",
        "experiment":EXPERIMENT,
        "peak":PEAK
        }
})

#Write the object to file.
with open(srv_path+'WashU.json','w') as jsonFile:
    json.dump(json_obj, jsonFile)
    
    