From 93c16c81e5e017b8ea129af60344211adb64eaa2 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Tue, 15 Dec 2015 17:22:02 -0800 Subject: [PATCH 01/11] adding prototype code for computing jaccard index --- src/pyokit/scripts/GenomicIntJaccard.py | 141 ++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/pyokit/scripts/GenomicIntJaccard.py diff --git a/src/pyokit/scripts/GenomicIntJaccard.py b/src/pyokit/scripts/GenomicIntJaccard.py new file mode 100644 index 0000000..42f81c1 --- /dev/null +++ b/src/pyokit/scripts/GenomicIntJaccard.py @@ -0,0 +1,141 @@ +#!/usr/bin/python + +""" + Date of Creation: 3rd Apr 2010 + Description: Iterators for processing BED format streams + + Copyright (C) 2010-2014 + Philip J. Uren + + Authors: Philip J. Uren + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +# python imports +import os +import sys +import unittest + +# pyokit imports +from pyokit.interface.cli import CLI, Option +from pyokit.io.bedIterators import BEDIterator +from pyokit.datastruct.genomicInterval import regionsIntersection +from pyokit.datascruct.genomicInterval import collapseRegions + + +############################################################################### +# HELPER FUNCTIONS # +############################################################################### +def count_toto_region_size(s): + """ + sum the size of regions in s; no check for whether they overlap is made. + """ + tot = 0 + for r in s: + tot += len(r) + + +############################################################################### +# USER INTERFACE # +############################################################################### + +def getUI(args): + """ + build and return a UI object for this script. + + :param args: raw arguments to parse + """ + programName = os.path.basename(sys.argv[0]) + longDescription = "Compute the Jaccard index for two sets of genomic " +\ + "intervals." + shortDescription = longDescription + + ui = CLI(programName, shortDescription, longDescription) + ui.minArgs = 2 + ui.maxArgs = 2 + ui.addOption(Option(short="v", long="verbose", + description="output additional messages to stderr " + + "about run", required=False)) + ui.addOption(Option(short="s", long="stranded", + description="treat regions on separate strands as " + + "disjoint, even if they overlap", + required=False)) + ui.addOption(Option(short="h", long="help", + description="show this help message ", special=True)) + ui.addOption(Option(short="u", long="test", + description="run unit tests ", special=True)) + + ui.parseCommandLine(args) + return ui + + +################################################################################ +# COMMAND LINE PROCESSING AND DISPATCH # +################################################################################ + +def main(args): + """ + main entry point for the GenomicIntJaccard script. + + :param args: the arguments for this script, as a list of string. Should + already have had things like the script name stripped. That + is, if there are no args provided, this should be an empty + list. + """ + # get options and arguments + ui = getUI(args) + + if ui.optionIsSet("test"): + # just run unit tests + unittest.main(argv=[sys.argv[0]]) + elif ui.optionIsSet("help"): + # just show help + ui.usage() + else: + verbose = ui.optionIsSet("verbose") + stranded = ui.optionIsSet("stranded") + + if stranded: + sys.stderr.write("Sorry, stranded mode hasn't been implemented yet.") + sys.exit() + + # we required two input files, so we know these will be present... + regions_1 = [e for e in BEDIterator(ui.getArgument(0), verbose=verbose)] + regions_2 = [e for e in BEDIterator(ui.getArgument(1), verbose=verbose)] + + intersection = regionsIntersection(regions_1, regions_2) + union = collapseRegions(regions_1 + regions_1) + + size_intersection = count_toto_region_size(intersection) + size_union = count_toto_region_size(union) + + jac = size_intersection / float(size_union) + print str(jac) + + +############################################################################### +# UNIT TESTS FOR THIS MODULE # +############################################################################### + +class TestGenomicIntJaccard(unittest.TestCase): + pass + + +############################################################################### +# ENTRY POINT WHEN RUN AS A STAND-ALONE MODULE # +############################################################################### + +if __name__ == "__main__": + unittest.main(argv=[sys.argv[0]]) From fc908bc2c70e76fa8737d5bc21bb1d121121680e Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Tue, 15 Dec 2015 17:25:11 -0800 Subject: [PATCH 02/11] fixed mistake in header for Jaccard idnex script --- src/pyokit/scripts/GenomicIntJaccard.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyokit/scripts/GenomicIntJaccard.py b/src/pyokit/scripts/GenomicIntJaccard.py index 42f81c1..b115d6c 100644 --- a/src/pyokit/scripts/GenomicIntJaccard.py +++ b/src/pyokit/scripts/GenomicIntJaccard.py @@ -1,8 +1,8 @@ #!/usr/bin/python """ - Date of Creation: 3rd Apr 2010 - Description: Iterators for processing BED format streams + Date of Creation: Dec 2015 + Description: Compute the Jaccard index for regions in two files Copyright (C) 2010-2014 Philip J. Uren From 3526ed864c4efc3a608cbc4c81a8563fa99a4223 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Tue, 15 Dec 2015 17:25:46 -0800 Subject: [PATCH 03/11] adding stubs for the genomic interval intersection and union scripts --- src/pyokit/scripts/GenomicIntIntersection.py | 25 ++++++++++++++++++++ src/pyokit/scripts/GenomicIntUnion.py | 25 ++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 src/pyokit/scripts/GenomicIntIntersection.py create mode 100644 src/pyokit/scripts/GenomicIntUnion.py diff --git a/src/pyokit/scripts/GenomicIntIntersection.py b/src/pyokit/scripts/GenomicIntIntersection.py new file mode 100644 index 0000000..932f23b --- /dev/null +++ b/src/pyokit/scripts/GenomicIntIntersection.py @@ -0,0 +1,25 @@ +#!/usr/bin/python + +""" + Date of Creation: Dec 2015 + Description: Get the intersection of genomic regions in one or more + files + + Copyright (C) 2010-2014 + Philip J. Uren + + Authors: Philip J. Uren + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" diff --git a/src/pyokit/scripts/GenomicIntUnion.py b/src/pyokit/scripts/GenomicIntUnion.py new file mode 100644 index 0000000..dfa747f --- /dev/null +++ b/src/pyokit/scripts/GenomicIntUnion.py @@ -0,0 +1,25 @@ +#!/usr/bin/python + +""" + Date of Creation: Dec 2015 + Description: Get the union of genomic regions in one or more + files + + Copyright (C) 2010-2014 + Philip J. Uren + + Authors: Philip J. Uren + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" From f2674b6dd1a933ffcd0b1f6295aa2a2c21da315b Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Wed, 16 Dec 2015 13:52:11 -0800 Subject: [PATCH 04/11] adding jaccard index function to genomic interval module --- src/pyokit/datastruct/genomicInterval.py | 32 +++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/pyokit/datastruct/genomicInterval.py b/src/pyokit/datastruct/genomicInterval.py index ed0e084..0354af9 100755 --- a/src/pyokit/datastruct/genomicInterval.py +++ b/src/pyokit/datastruct/genomicInterval.py @@ -54,6 +54,36 @@ class InvalidStartIndexError(GenomicIntervalError): # FUNCTIONS FOR MANIPULATING COLLECTIONS OF GENOMIC INTERVALS # ############################################################################### +def jaccardIndex(s1, s2, stranded=False): + """ + Compute the Jaccard index for two collections of genomic intervals + + :param s1: the first set of genomic intervals + :param s2: the second set of genomic intervals + :param stranded: if True, treat regions on different strands as not + intersecting each other, even if they occupy the same + genomic region. + :return: Jaccard index + """ + + def count(s): + """ sum the size of regions in s. """ + tot = 0 + for r in s: + tot += len(r) + return tot + + if stranded: + raise GenomicIntervalError("Sorry, stranded mode for computing Jaccard " + + "index hasn't been implemented yet.") + + s1 = collapseRegions(s1) + s2 = collapseRegions(s2) + intersection = regionsIntersection(s1, s2) + c_i = count(intersection) + return c_i / float(count(s1) + count(s2) - c_i) + + def intervalTreesFromList(inElements, verbose=False, openEnded=False): """ build a dictionary, indexed by chrom name, of interval trees for each chrom. @@ -197,7 +227,7 @@ def get_first_matching_index(s, proc_strands): return res -def regionsIntersection(s1, s2): +def regionsIntersection(s1, s2, collapse=True): """ given two lists of genomic regions with chromosome, start and end coordinates, return a new list of regions which is the intersection of those From 3abf993cb05a36191f7f97a3e23b4891ab43cdf8 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Wed, 16 Dec 2015 13:52:36 -0800 Subject: [PATCH 05/11] refactored jaccard index script a bit and added a basic unit test --- src/pyokit/scripts/GenomicIntJaccard.py | 52 ++++++++++++++----------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/pyokit/scripts/GenomicIntJaccard.py b/src/pyokit/scripts/GenomicIntJaccard.py index b115d6c..556e723 100644 --- a/src/pyokit/scripts/GenomicIntJaccard.py +++ b/src/pyokit/scripts/GenomicIntJaccard.py @@ -27,24 +27,16 @@ import os import sys import unittest +import StringIO + +# for unit testing +import mock # pyokit imports from pyokit.interface.cli import CLI, Option from pyokit.io.bedIterators import BEDIterator -from pyokit.datastruct.genomicInterval import regionsIntersection -from pyokit.datascruct.genomicInterval import collapseRegions - - -############################################################################### -# HELPER FUNCTIONS # -############################################################################### -def count_toto_region_size(s): - """ - sum the size of regions in s; no check for whether they overlap is made. - """ - tot = 0 - for r in s: - tot += len(r) +from pyokit.datastruct.genomicInterval import jaccardIndex +from pyokit.util.testing import build_mock_open_side_effect ############################################################################### @@ -115,14 +107,7 @@ def main(args): regions_1 = [e for e in BEDIterator(ui.getArgument(0), verbose=verbose)] regions_2 = [e for e in BEDIterator(ui.getArgument(1), verbose=verbose)] - intersection = regionsIntersection(regions_1, regions_2) - union = collapseRegions(regions_1 + regions_1) - - size_intersection = count_toto_region_size(intersection) - size_union = count_toto_region_size(union) - - jac = size_intersection / float(size_union) - print str(jac) + print jaccardIndex(regions_1, regions_2) ############################################################################### @@ -130,7 +115,28 @@ def main(args): ############################################################################### class TestGenomicIntJaccard(unittest.TestCase): - pass + + def setUp(self): + f1_conts = "\n".join(["\t".join(["chr1", "10", "20", "F1_R1", "0", "+"]), + "\t".join(["chr1", "70", "80", "F2_R2", "0", "+"]), + "\t".join(["chr2", "05", "10", "F2_R2", "0", "+"]), + "\t".join(["chr2", "70", "75", "F2_R2", "0", "+"]), + "\t".join(["chr2", "90", "95", "F2_R2", "0", "+"])]) + f2_conts = "\n".join(["\t".join(["chr1", "07", "12", "F1_R1", "0", "+"]), + "\t".join(["chr1", "67", "70", "F2_R2", "0", "+"]), + "\t".join(["chr1", "75", "85", "F2_R2", "0", "+"]), + "\t".join(["chr2", "20", "30", "F2_R2", "0", "+"]), + "\t".join(["chr2", "73", "92", "F2_R2", "0", "+"])]) + string_d = {"file1.bed": f1_conts, + "file2.bed": f2_conts} + self.mock_open_side_effect = build_mock_open_side_effect(string_d, {}) + + @mock.patch('__builtin__.open') + @mock.patch('sys.stdout', new_callable=StringIO.StringIO) + def test_simple(self, mock_stdout, mock_open): + mock_open.side_effect = self.mock_open_side_effect + main(["file1.bed", "file2.bed"]) + self.assertAlmostEqual(float(mock_stdout.getvalue()), 0.1549296) ############################################################################### From c3f9aee2a9c2173a363d9cb0f86a46bad2982254 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Wed, 16 Dec 2015 13:56:25 -0800 Subject: [PATCH 06/11] hooking up new script unit tests to run automatically --- src/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test.py b/src/test.py index fdc5380..881a739 100755 --- a/src/test.py +++ b/src/test.py @@ -90,9 +90,11 @@ from pyokit.scripts.convertJunctionReads import ConvertJunctionsUnitTests from pyokit.scripts.regionCollapse import TestCollapseRegions from pyokit.scripts.overlapProfile import TestOverlapProfile +from pyokit.scripts.GenomicIntJaccard import TestGenomicIntJaccard from pyokit.scripts.pyokitMain import PyokitSmokeTests + if __name__ == "__main__": head = " ------------------------ DATA STRUCTURES --------------------- \n" sys.stderr.write(head) @@ -147,6 +149,7 @@ sys.stderr.write(" " + str(ConvertJunctionsUnitTests) + "\n") sys.stderr.write(" " + str(TestConservationProfileIndvFiles) + "\n") sys.stderr.write(" " + str(TestOverlapProfile) + "\n") + sys.stderr.write(" " + str(TestGenomicIntJaccard) + "\n") sys.stderr.write(" " + str(PyokitSmokeTests) + "\n\n") sys.stderr.write("\n\n RUNNING TESTS \n\n") From 98ec9f7b4264a83722ec1dee4985c4f6799f05b9 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Wed, 16 Dec 2015 14:46:13 -0800 Subject: [PATCH 07/11] fleshed out basic logic for script that computes intersection of genomic regions --- src/pyokit/scripts/GenomicIntIntersection.py | 118 ++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/src/pyokit/scripts/GenomicIntIntersection.py b/src/pyokit/scripts/GenomicIntIntersection.py index 932f23b..08071ca 100644 --- a/src/pyokit/scripts/GenomicIntIntersection.py +++ b/src/pyokit/scripts/GenomicIntIntersection.py @@ -5,7 +5,7 @@ Description: Get the intersection of genomic regions in one or more files - Copyright (C) 2010-2014 + Copyright (C) 2010-2015 Philip J. Uren Authors: Philip J. Uren @@ -23,3 +23,119 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . """ + +# standard python imports +import os +import sys +import unittest +# import StringIO + +# for unit testing +# import mock + +# pyokit imports +from pyokit.interface.cli import CLI, Option +from pyokit.datastruct.genomicInterval import regionsIntersection + + +################################################################################ +# USER INTERFACE # +################################################################################ + +def getUI(args): + """ + build and return a UI object for this script. + + :param args: raw arguments to parse + """ + programName = os.path.basename(sys.argv[0]) + longDescription = "takes a file with a list of p-values and applies " +\ + "Benjamini and Hochberg FDR to convert to q-values " + shortDescription = "takes a file with a list of p-values and applies " +\ + "Benjamini and Hochberg FDR to convert to q-values " + + ui = CLI(programName, shortDescription, longDescription) + ui.minArgs = 2 + ui.maxArgs = 2 + ui.addOption(Option(short="o", long="output", argName="filename", + description="output to given file, else stdout", + required=False, type=str)) + ui.addOption(Option(short="s", long="stranded", + description="treat regions on separate strands as " + + "disjoint, even if they overlap", + required=False)) + ui.addOption(Option(short="v", long="verbose", + description="output additional messages to stderr " + + "about run", required=False)) + ui.addOption(Option(short="h", long="help", + description="show this help message ", special=True)) + ui.addOption(Option(short="u", long="test", + description="run unit tests ", special=True)) + + ui.parseCommandLine(args) + return ui + + +################################################################################ +# COMMAND LINE PROCESSING AND DISPATCH # +################################################################################ + +def main(args): + """ + main entry point for the GenomicIntIntersection script. + + :param args: the arguments for this script, as a list of string. Should + already have had things like the script name stripped. That + is, if there are no args provided, this should be an empty + list. + """ + # get options and arguments + ui = getUI(args) + + if ui.optionIsSet("test"): + # just run unit tests + unittest.main(argv=[sys.argv[0]]) + elif ui.optionIsSet("help"): + # just show help + ui.usage() + else: + verbose = ui.optionIsSet("verbose") + + # stranded? + stranded = ui.optionIsSet("stranded") + if stranded: + sys.stderr.write("Sorry, stranded mode hasn't been implemented yet.") + sys.exit() + + # get output handle + out_fh = sys.stdout + if ui.optionIsSet("output"): + out_fh = open(ui.getValue("output"), "w") + + # get input file-handles -- we know we'll get exactly two, since we + # specified it in the UI definition + regions_1 = [x for x in open(ui.getArgument(0))] + regions_2 = [x for x in open(ui.getArgument(1))] + + for r in regionsIntersection(regions_1, regions_2): + out_fh.write(str(r) + "\n") + + +############################################################################### +# UNIT TESTS # +############################################################################### + +class TestGenomicIntIntersection(unittest.TestCase): + + """Unit tests for the GenomicIntIntersection program/script.""" + + def setUp(self): + pass + + +############################################################################### +# ENTRY POINT WHEN RUN AS A STAND-ALONE MODULE # +############################################################################### + +if __name__ == "__main__": + main(sys.argv[1:]) From 074e0ea1ce102275763a49487ad4219fb7e6b952 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Thu, 17 Dec 2015 10:56:51 -0800 Subject: [PATCH 08/11] removing stub for getting union of genomic regions, since this functionality already exists --- src/pyokit/scripts/GenomicIntUnion.py | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 src/pyokit/scripts/GenomicIntUnion.py diff --git a/src/pyokit/scripts/GenomicIntUnion.py b/src/pyokit/scripts/GenomicIntUnion.py deleted file mode 100644 index dfa747f..0000000 --- a/src/pyokit/scripts/GenomicIntUnion.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/python - -""" - Date of Creation: Dec 2015 - Description: Get the union of genomic regions in one or more - files - - Copyright (C) 2010-2014 - Philip J. Uren - - Authors: Philip J. Uren - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" From 600f2244bd4259e328b8a3ac81cf1a93b4d7869f Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Thu, 17 Dec 2015 10:59:55 -0800 Subject: [PATCH 09/11] adjusted names of a couple of scripts --- .../scripts/{GenomicIntJaccard.py => genomicIntJaccard.py} | 0 .../{GenomicIntIntersection.py => genomicIntersection.py} | 0 src/test.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename src/pyokit/scripts/{GenomicIntJaccard.py => genomicIntJaccard.py} (100%) rename src/pyokit/scripts/{GenomicIntIntersection.py => genomicIntersection.py} (100%) diff --git a/src/pyokit/scripts/GenomicIntJaccard.py b/src/pyokit/scripts/genomicIntJaccard.py similarity index 100% rename from src/pyokit/scripts/GenomicIntJaccard.py rename to src/pyokit/scripts/genomicIntJaccard.py diff --git a/src/pyokit/scripts/GenomicIntIntersection.py b/src/pyokit/scripts/genomicIntersection.py similarity index 100% rename from src/pyokit/scripts/GenomicIntIntersection.py rename to src/pyokit/scripts/genomicIntersection.py diff --git a/src/test.py b/src/test.py index 881a739..ef3df43 100755 --- a/src/test.py +++ b/src/test.py @@ -90,7 +90,7 @@ from pyokit.scripts.convertJunctionReads import ConvertJunctionsUnitTests from pyokit.scripts.regionCollapse import TestCollapseRegions from pyokit.scripts.overlapProfile import TestOverlapProfile -from pyokit.scripts.GenomicIntJaccard import TestGenomicIntJaccard +from pyokit.scripts.genomicIntJaccard import TestGenomicIntJaccard from pyokit.scripts.pyokitMain import PyokitSmokeTests From 1f8e042db331b8555720fe71f2ef8ddf50cde2d0 Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Sun, 15 Nov 2015 17:35:10 -0800 Subject: [PATCH 10/11] added ability for mocked file open to attach names to created StringIO objects, and added default empty dict for streams param --- src/pyokit/util/testing.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/pyokit/util/testing.py b/src/pyokit/util/testing.py index bcd99d9..90b168b 100644 --- a/src/pyokit/util/testing.py +++ b/src/pyokit/util/testing.py @@ -27,20 +27,30 @@ import StringIO -def build_mock_open_side_effect(string_d, stream_d): +def build_mock_open_side_effect(string_d, stream_d=None, add_names=True): """ Build a mock open side effect using a dictionary of content for the files. - :param string_d: keys are file names, values are string file contents - :param stream_d: keys are file names, values are stream of contents + :param string_d: keys are file names, values are string file contents + :param stream_d: keys are file names, values are stream of contents + :param add_names: if True, each StringIO object will have a .name attribute + added to it. """ + if stream_d is None: + stream_d = {} assert(len(set(string_d.keys()).intersection(set(stream_d.keys()))) == 0) def mock_open_side_effect(*args, **kwargs): if args[0] in string_d: - return StringIO.StringIO(string_d[args[0]]) + r = StringIO.StringIO(string_d[args[0]]) + if add_names: + r.name = args[0] + return r elif args[0] in stream_d: - return stream_d[args[0]] + r = stream_d[args[0]] + if add_names: + r.name = args[0] + return r else: raise IOError("No such file: " + args[0] + ". Known these strings " + ", ".join(string_d.keys()) + " and these streams " + From e9698bdcb59942115000ceaac8c3337ad22796be Mon Sep 17 00:00:00 2001 From: Philip Uren Date: Thu, 17 Dec 2015 11:31:09 -0800 Subject: [PATCH 11/11] added unit tests for getting intersection of regions --- src/pyokit/scripts/genomicIntersection.py | 34 +++++++++++++++++++---- src/test.py | 3 +- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/pyokit/scripts/genomicIntersection.py b/src/pyokit/scripts/genomicIntersection.py index 08071ca..c0609b0 100644 --- a/src/pyokit/scripts/genomicIntersection.py +++ b/src/pyokit/scripts/genomicIntersection.py @@ -28,13 +28,15 @@ import os import sys import unittest -# import StringIO +import StringIO # for unit testing -# import mock +import mock # pyokit imports from pyokit.interface.cli import CLI, Option +from pyokit.io.bedIterators import BEDIterator +from pyokit.util.testing import build_mock_open_side_effect from pyokit.datastruct.genomicInterval import regionsIntersection @@ -114,8 +116,8 @@ def main(args): # get input file-handles -- we know we'll get exactly two, since we # specified it in the UI definition - regions_1 = [x for x in open(ui.getArgument(0))] - regions_2 = [x for x in open(ui.getArgument(1))] + regions_1 = [x for x in BEDIterator(ui.getArgument(0), verbose=verbose)] + regions_2 = [x for x in BEDIterator(ui.getArgument(1), verbose=verbose)] for r in regionsIntersection(regions_1, regions_2): out_fh.write(str(r) + "\n") @@ -130,7 +132,29 @@ class TestGenomicIntIntersection(unittest.TestCase): """Unit tests for the GenomicIntIntersection program/script.""" def setUp(self): - pass + f1_conts = "\n".join(["\t".join(["chr1", "10", "20", "F1_R1", "0", "+"]), + "\t".join(["chr1", "70", "80", "F2_R2", "0", "+"]), + "\t".join(["chr2", "05", "10", "F2_R2", "0", "+"]), + "\t".join(["chr2", "70", "75", "F2_R2", "0", "+"]), + "\t".join(["chr2", "90", "95", "F2_R2", "0", "+"])]) + f2_conts = "\n".join(["\t".join(["chr1", "07", "12", "F1_R1", "0", "+"]), + "\t".join(["chr1", "67", "70", "F2_R2", "0", "+"]), + "\t".join(["chr1", "75", "85", "F2_R2", "0", "+"]), + "\t".join(["chr2", "20", "30", "F2_R2", "0", "+"]), + "\t".join(["chr2", "73", "92", "F2_R2", "0", "+"])]) + self.str_d = {"file1.bed": f1_conts, "file2.bed": f2_conts} + + @mock.patch('__builtin__.open') + def testTwoFiles(self, mock_open): + outfh = StringIO.StringIO() + stream_d = {"out.bed": outfh} + mock_open.side_effect = build_mock_open_side_effect(self.str_d, stream_d) + main(["-o", "out.bed", "file1.bed", "file2.bed"]) + expect = "\n".join(["\t".join(["chr1", "10", "12", "X", "0", "+"]), + "\t".join(["chr1", "75", "80", "X", "0", "+"]), + "\t".join(["chr2", "73", "75", "X", "0", "+"]), + "\t".join(["chr2", "90", "92", "X", "0", "+"])]) + "\n" + self.assertEqual(outfh.getvalue(), expect) ############################################################################### diff --git a/src/test.py b/src/test.py index ef3df43..6062f30 100755 --- a/src/test.py +++ b/src/test.py @@ -91,10 +91,10 @@ from pyokit.scripts.regionCollapse import TestCollapseRegions from pyokit.scripts.overlapProfile import TestOverlapProfile from pyokit.scripts.genomicIntJaccard import TestGenomicIntJaccard +from pyokit.scripts.genomicIntersection import TestGenomicIntIntersection from pyokit.scripts.pyokitMain import PyokitSmokeTests - if __name__ == "__main__": head = " ------------------------ DATA STRUCTURES --------------------- \n" sys.stderr.write(head) @@ -150,6 +150,7 @@ sys.stderr.write(" " + str(TestConservationProfileIndvFiles) + "\n") sys.stderr.write(" " + str(TestOverlapProfile) + "\n") sys.stderr.write(" " + str(TestGenomicIntJaccard) + "\n") + sys.stderr.write(" " + str(TestGenomicIntIntersection) + "\n") sys.stderr.write(" " + str(PyokitSmokeTests) + "\n\n") sys.stderr.write("\n\n RUNNING TESTS \n\n")