From 29f527a78ed509328e68d0a0fa8ba6eea57b7126 Mon Sep 17 00:00:00 2001 From: Aleksandra Galitsyna Date: Mon, 10 Oct 2022 15:54:36 -0400 Subject: [PATCH 1/5] Resolving https://github.com/open2c/bioframe/issues/124 --- bioframe/io/fileops.py | 67 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py index 0f82a9a2..d7bbb2b1 100644 --- a/bioframe/io/fileops.py +++ b/bioframe/io/fileops.py @@ -5,6 +5,9 @@ import json import io +import os +import shutil + import numpy as np import pandas as pd @@ -488,7 +491,7 @@ def read_bigbed(path, chrom, start=None, end=None, engine="auto"): return df -def to_bigwig(df, chromsizes, outpath, value_field=None): +def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): """ Save a bedGraph-like dataframe as a binary BigWig track. @@ -504,8 +507,33 @@ def to_bigwig(df, chromsizes, outpath, value_field=None): value_field : str, optional Select the column label of the data frame to generate the track. Default is to use the fourth column. + path : str, optional + Provide system path to the bedGraphToBigWig binary. """ + + if path is None: + cmd = "bedGraphToBigWig" + try: + assert shutil.which(command) is not None + except Exception as e: + raise ValueError( + "bedGraphToBigWig is not present in the current environment. " + "Install it with, for example, conda install -y -c bioconda ucsc-bedgraphtobigwig " + ) + elif path.endswith("bedGraphToBigWig"): + cmd = path + if not os.path.isfile(path) and os.access(path, os.X_OK): + raise ValueError( + f"bedGraphToBigWig is absent in the provided path: {path}. " + ) + else: + cmd = os.path.join(path, "bedGraphToBigWig") + if not os.path.isfile(path) and os.access(path, os.X_OK): + raise ValueError( + f"bedGraphToBigWig is absent in the provided path: {path}. " + ) + is_bedgraph = True for col in ["chrom", "start", "end"]: if col not in df.columns: @@ -526,9 +554,9 @@ def to_bigwig(df, chromsizes, outpath, value_field=None): bg["chrom"] = bg["chrom"].astype(str) bg = bg.sort_values(["chrom", "start", "end"]) - with tempfile.NamedTemporaryFile(suffix=".bg") as f, tempfile.NamedTemporaryFile( - "wt", suffix=".chrom.sizes" - ) as cs: + with open(outpath+'.bg', 'w') as f, open(outpath+'.cs', 'w') as cs: #tempfile.NamedTemporaryFile(suffix=".bg") as f, tempfile.NamedTemporaryFile( + # "wt", suffix=".chrom.sizes" + #) as cs: chromsizes.to_csv(cs, sep="\t", header=False) cs.flush() @@ -538,14 +566,14 @@ def to_bigwig(df, chromsizes, outpath, value_field=None): ) p = subprocess.run( - ["bedGraphToBigWig", f.name, cs.name, outpath], + [cmd, f.name, cs.name, outpath], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) return p -def to_bigbed(df, chromsizes, outpath, schema="bed6"): +def to_bigbed(df, chromsizes, outpath, schema="bed6", path=None): """ Save a bedGraph-like dataframe as a binary BigWig track. @@ -561,8 +589,33 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6"): value_field : str, optional Select the column label of the data frame to generate the track. Default is to use the fourth column. + path : str, optional + Provide system path to the bedGraphToBigWig binary. """ + + if path is None: + cmd = "bedToBigBed" + try: + assert shutil.which(command) is not None + except Exception as e: + raise ValueError( + "bedToBigBed is not present in the current environment. " + "Install it with, for example, conda install -y -c bioconda ucsc-bedtobigbed " + ) + elif path.endswith("bedToBigBed"): + cmd = path + if not os.path.isfile(path) and os.access(path, os.X_OK): + raise ValueError( + f"bedToBigBed is absent in the provided path: {path}. " + ) + else: + cmd = os.path.join(path, "bedGraphToBigWig") + if not os.path.isfile(path) and os.access(path, os.X_OK): + raise ValueError( + f"bedToBigBed is absent in the provided path: {path}. " + ) + is_bed6 = True for col in ["chrom", "start", "end", "name", "score", "strand"]: if col not in df.columns: @@ -590,7 +643,7 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6"): ) p = subprocess.run( - ["bedToBigBed", "-type={}".format(schema), f.name, cs.name, outpath], + [cmd, "-type={}".format(schema), f.name, cs.name, outpath], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) From a34c47e25cbc838b909db0444b0d298dbdf92a79 Mon Sep 17 00:00:00 2001 From: Aleksandra Galitsyna Date: Mon, 10 Oct 2022 16:04:38 -0400 Subject: [PATCH 2/5] path small fixes. code tested with and without path param --- bioframe/io/fileops.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py index d7bbb2b1..e91a582f 100644 --- a/bioframe/io/fileops.py +++ b/bioframe/io/fileops.py @@ -519,17 +519,18 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): except Exception as e: raise ValueError( "bedGraphToBigWig is not present in the current environment. " - "Install it with, for example, conda install -y -c bioconda ucsc-bedgraphtobigwig " + "Pass it as 'path' parameter to bioframe.to_bigwig or " + "install it with, for example, conda install -y -c bioconda ucsc-bedgraphtobigwig " ) elif path.endswith("bedGraphToBigWig"): - cmd = path if not os.path.isfile(path) and os.access(path, os.X_OK): raise ValueError( f"bedGraphToBigWig is absent in the provided path: {path}. " ) + cmd = path else: cmd = os.path.join(path, "bedGraphToBigWig") - if not os.path.isfile(path) and os.access(path, os.X_OK): + if not os.path.isfile(cmd) and os.access(cmd, os.X_OK): raise ValueError( f"bedGraphToBigWig is absent in the provided path: {path}. " ) @@ -601,17 +602,18 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6", path=None): except Exception as e: raise ValueError( "bedToBigBed is not present in the current environment. " - "Install it with, for example, conda install -y -c bioconda ucsc-bedtobigbed " + "Pass it as 'path' parameter to bioframe.to_bigbed or " + "install it with, for example, conda install -y -c bioconda ucsc-bedtobigbed " ) elif path.endswith("bedToBigBed"): - cmd = path if not os.path.isfile(path) and os.access(path, os.X_OK): raise ValueError( f"bedToBigBed is absent in the provided path: {path}. " ) + cmd = path else: cmd = os.path.join(path, "bedGraphToBigWig") - if not os.path.isfile(path) and os.access(path, os.X_OK): + if not os.path.isfile(cmd) and os.access(cmd, os.X_OK): raise ValueError( f"bedToBigBed is absent in the provided path: {path}. " ) From edcc051bb0fd0efccddc9136893e801467f3d530 Mon Sep 17 00:00:00 2001 From: Aleksandra Galitsyna Date: Mon, 10 Oct 2022 16:11:15 -0400 Subject: [PATCH 3/5] small fix --- bioframe/io/fileops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py index e91a582f..05fe5b52 100644 --- a/bioframe/io/fileops.py +++ b/bioframe/io/fileops.py @@ -515,7 +515,7 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): if path is None: cmd = "bedGraphToBigWig" try: - assert shutil.which(command) is not None + assert shutil.which(cmd) is not None except Exception as e: raise ValueError( "bedGraphToBigWig is not present in the current environment. " @@ -598,7 +598,7 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6", path=None): if path is None: cmd = "bedToBigBed" try: - assert shutil.which(command) is not None + assert shutil.which(cmd) is not None except Exception as e: raise ValueError( "bedToBigBed is not present in the current environment. " From 5975a02bb6c906b5338d79476fd7235229cac3d0 Mon Sep 17 00:00:00 2001 From: Aleksandra Galitsyna Date: Mon, 10 Oct 2022 17:22:10 -0400 Subject: [PATCH 4/5] fixes of names / more explicit message --- bioframe/io/fileops.py | 46 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py index 05fe5b52..5c6c6f9b 100644 --- a/bioframe/io/fileops.py +++ b/bioframe/io/fileops.py @@ -491,7 +491,7 @@ def read_bigbed(path, chrom, start=None, end=None, engine="auto"): return df -def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): +def to_bigwig(df, chromsizes, outpath, value_field=None, path_to_binary=None): """ Save a bedGraph-like dataframe as a binary BigWig track. @@ -507,32 +507,32 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): value_field : str, optional Select the column label of the data frame to generate the track. Default is to use the fourth column. - path : str, optional + path_to_binary : str, optional Provide system path to the bedGraphToBigWig binary. """ - if path is None: + if path_to_binary is None: cmd = "bedGraphToBigWig" try: assert shutil.which(cmd) is not None except Exception as e: raise ValueError( "bedGraphToBigWig is not present in the current environment. " - "Pass it as 'path' parameter to bioframe.to_bigwig or " + "Pass it as 'path_to_binary' parameter to bioframe.to_bigwig or " "install it with, for example, conda install -y -c bioconda ucsc-bedgraphtobigwig " ) - elif path.endswith("bedGraphToBigWig"): - if not os.path.isfile(path) and os.access(path, os.X_OK): + elif path_to_binary.endswith("bedGraphToBigWig"): + if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK): raise ValueError( - f"bedGraphToBigWig is absent in the provided path: {path}. " + f"bedGraphToBigWig is absent in the provided path or cannot be executed: {path}. " ) - cmd = path + cmd = path_to_binary else: - cmd = os.path.join(path, "bedGraphToBigWig") + cmd = os.path.join(path_to_binary, "bedGraphToBigWig") if not os.path.isfile(cmd) and os.access(cmd, os.X_OK): raise ValueError( - f"bedGraphToBigWig is absent in the provided path: {path}. " + f"bedGraphToBigWig is absent in the provided path or cannot be executed: {path_to_binary}. " ) is_bedgraph = True @@ -555,9 +555,9 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): bg["chrom"] = bg["chrom"].astype(str) bg = bg.sort_values(["chrom", "start", "end"]) - with open(outpath+'.bg', 'w') as f, open(outpath+'.cs', 'w') as cs: #tempfile.NamedTemporaryFile(suffix=".bg") as f, tempfile.NamedTemporaryFile( - # "wt", suffix=".chrom.sizes" - #) as cs: + with tempfile.NamedTemporaryFile(suffix=".bg") as f, tempfile.NamedTemporaryFile( + "wt", suffix=".chrom.sizes" + ) as cs: chromsizes.to_csv(cs, sep="\t", header=False) cs.flush() @@ -574,7 +574,7 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path=None): return p -def to_bigbed(df, chromsizes, outpath, schema="bed6", path=None): +def to_bigbed(df, chromsizes, outpath, schema="bed6", path_to_binary=None): """ Save a bedGraph-like dataframe as a binary BigWig track. @@ -590,32 +590,32 @@ def to_bigbed(df, chromsizes, outpath, schema="bed6", path=None): value_field : str, optional Select the column label of the data frame to generate the track. Default is to use the fourth column. - path : str, optional + path_to_binary : str, optional Provide system path to the bedGraphToBigWig binary. """ - if path is None: + if path_to_binary is None: cmd = "bedToBigBed" try: assert shutil.which(cmd) is not None except Exception as e: raise ValueError( "bedToBigBed is not present in the current environment. " - "Pass it as 'path' parameter to bioframe.to_bigbed or " + "Pass it as 'path_to_binary' parameter to bioframe.to_bigbed or " "install it with, for example, conda install -y -c bioconda ucsc-bedtobigbed " ) - elif path.endswith("bedToBigBed"): - if not os.path.isfile(path) and os.access(path, os.X_OK): + elif path_to_binary.endswith("bedToBigBed"): + if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK): raise ValueError( - f"bedToBigBed is absent in the provided path: {path}. " + f"bedToBigBed is absent in the provided path or cannot be executed: {path_to_binary}. " ) - cmd = path + cmd = path_to_binary else: - cmd = os.path.join(path, "bedGraphToBigWig") + cmd = os.path.join(path_to_binary, "bedGraphToBigWig") if not os.path.isfile(cmd) and os.access(cmd, os.X_OK): raise ValueError( - f"bedToBigBed is absent in the provided path: {path}. " + f"bedToBigBed is absent in the provided path or cannot be executed: {path_to_binary}. " ) is_bed6 = True From 13890c068896f180432e5beac3dee9b952c94eb4 Mon Sep 17 00:00:00 2001 From: Aleksandra Galitsyna Date: Mon, 10 Oct 2022 17:27:06 -0400 Subject: [PATCH 5/5] fixes of names --- bioframe/io/fileops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioframe/io/fileops.py b/bioframe/io/fileops.py index 5c6c6f9b..0678fe7b 100644 --- a/bioframe/io/fileops.py +++ b/bioframe/io/fileops.py @@ -525,7 +525,7 @@ def to_bigwig(df, chromsizes, outpath, value_field=None, path_to_binary=None): elif path_to_binary.endswith("bedGraphToBigWig"): if not os.path.isfile(path_to_binary) and os.access(path_to_binary, os.X_OK): raise ValueError( - f"bedGraphToBigWig is absent in the provided path or cannot be executed: {path}. " + f"bedGraphToBigWig is absent in the provided path or cannot be executed: {path_to_binary}. " ) cmd = path_to_binary else: