Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subprocesses path for bigwig and bigbed outputs #125

Merged
merged 5 commits into from
Oct 10, 2022
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 62 additions & 7 deletions bioframe/io/fileops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import json
import io

import os
import shutil

import numpy as np
import pandas as pd

Expand Down Expand Up @@ -488,7 +491,7 @@ def read_bigbed(path, chrom, start=None, end=None, engine="auto"):
return df


def to_bigwig(df, chromsizes, outpath, value_field=None):
def to_bigwig(df, chromsizes, outpath, value_field=None, path=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

path_to_binary would be more explicit

"""
Save a bedGraph-like dataframe as a binary BigWig track.

Expand All @@ -504,8 +507,34 @@ def to_bigwig(df, chromsizes, outpath, value_field=None):
value_field : str, optional
Select the column label of the data frame to generate the track. Default
is to use the fourth column.
path : str, optional
Provide system path to the bedGraphToBigWig binary.

"""

if path is None:
cmd = "bedGraphToBigWig"
try:
assert shutil.which(cmd) is not None
except Exception as e:
raise ValueError(
"bedGraphToBigWig is not present in the current environment. "
"Pass it as 'path' parameter to bioframe.to_bigwig or "
"install it with, for example, conda install -y -c bioconda ucsc-bedgraphtobigwig "
)
elif path.endswith("bedGraphToBigWig"):
if not os.path.isfile(path) and os.access(path, os.X_OK):
raise ValueError(
f"bedGraphToBigWig is absent in the provided path: {path}. "
)
cmd = path
else:
cmd = os.path.join(path, "bedGraphToBigWig")
if not os.path.isfile(cmd) and os.access(cmd, os.X_OK):
raise ValueError(
f"bedGraphToBigWig is absent in the provided path: {path}. "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

absent or cannot be exectuted

)

is_bedgraph = True
for col in ["chrom", "start", "end"]:
if col not in df.columns:
Expand All @@ -526,9 +555,9 @@ def to_bigwig(df, chromsizes, outpath, value_field=None):
bg["chrom"] = bg["chrom"].astype(str)
bg = bg.sort_values(["chrom", "start", "end"])

with tempfile.NamedTemporaryFile(suffix=".bg") as f, tempfile.NamedTemporaryFile(
"wt", suffix=".chrom.sizes"
) as cs:
with open(outpath+'.bg', 'w') as f, open(outpath+'.cs', 'w') as cs: #tempfile.NamedTemporaryFile(suffix=".bg") as f, tempfile.NamedTemporaryFile(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert?

# "wt", suffix=".chrom.sizes"
#) as cs:

chromsizes.to_csv(cs, sep="\t", header=False)
cs.flush()
Expand All @@ -538,14 +567,14 @@ def to_bigwig(df, chromsizes, outpath, value_field=None):
)

p = subprocess.run(
["bedGraphToBigWig", f.name, cs.name, outpath],
[cmd, f.name, cs.name, outpath],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
return p


def to_bigbed(df, chromsizes, outpath, schema="bed6"):
def to_bigbed(df, chromsizes, outpath, schema="bed6", path=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment about argument name

"""
Save a bedGraph-like dataframe as a binary BigWig track.

Expand All @@ -561,8 +590,34 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6"):
value_field : str, optional
Select the column label of the data frame to generate the track. Default
is to use the fourth column.
path : str, optional
Provide system path to the bedGraphToBigWig binary.

"""

if path is None:
cmd = "bedToBigBed"
try:
assert shutil.which(cmd) is not None
except Exception as e:
raise ValueError(
"bedToBigBed is not present in the current environment. "
"Pass it as 'path' parameter to bioframe.to_bigbed or "
"install it with, for example, conda install -y -c bioconda ucsc-bedtobigbed "
)
elif path.endswith("bedToBigBed"):
if not os.path.isfile(path) and os.access(path, os.X_OK):
raise ValueError(
f"bedToBigBed is absent in the provided path: {path}. "
)
cmd = path
else:
cmd = os.path.join(path, "bedGraphToBigWig")
if not os.path.isfile(cmd) and os.access(cmd, os.X_OK):
raise ValueError(
f"bedToBigBed is absent in the provided path: {path}. "
)

is_bed6 = True
for col in ["chrom", "start", "end", "name", "score", "strand"]:
if col not in df.columns:
Expand Down Expand Up @@ -590,7 +645,7 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6"):
)

p = subprocess.run(
["bedToBigBed", "-type={}".format(schema), f.name, cs.name, outpath],
[cmd, "-type={}".format(schema), f.name, cs.name, outpath],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
Expand Down