Skip to content

Commit

Permalink
Merge pull request #1 from mcgml/allele_lookup
Browse files Browse the repository at this point in the history
added function to lookup specific variant using SQL
  • Loading branch information
lemieuxl committed Apr 30, 2018
2 parents 09875f8 + 99e6765 commit cb33190
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
26 changes: 26 additions & 0 deletions pybgen/pybgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,32 @@ def iter_variants_in_region(self, chrom, start, end):
self._bgen.seek(seek_pos)
yield self._read_current_variant()

def get_specific_variant(self, chrom, pos, ref, alt):
"""Get specific variant with allele lookup
Args:
chrom (str): The name of the chromosome.
pos (int): The starting position of the region.
ref (str): The reference allele.
alt (str): The alternative allele.
"""
# Getting the region from the index file
self._bgen_index.execute(
"SELECT file_start_position "
"FROM Variant "
"WHERE chromosome = ? AND position = ? AND allele1 = ? AND allele2 = ?",
(chrom, pos, ref, alt),
)

# Fetching all the seek positions
seek_positions = [_[0] for _ in self._bgen_index.fetchall()]

# Fetching seek positions, we return the variant
for seek_pos in seek_positions:
self._bgen.seek(seek_pos)
yield self._read_current_variant()

def iter_variant_info(self):
"""Iterate over marker information."""
self._bgen_index.execute(
Expand Down
28 changes: 28 additions & 0 deletions pybgen/tests/test_pybgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,34 @@ def test_iter_variants_in_region(self):
expected.add(name)
self.assertEqual(seen_variants, expected)

def test_get_specific_variant(self):
"""Test for specific variant lookup."""
seen_variants = set()
iterator = self.bgen.get_specific_variant("01", 67000, "A", "G")
for variant, dosage in iterator:
# The name of the variant
name = variant.name
seen_variants.add(name)

# Comparing the variant
self._compare_variant(
self.truths["variants"][name]["variant"], variant,
)

# Comparing the dosage
np.testing.assert_array_almost_equal(
self.truths["variants"][name]["data"], dosage,
)

# Checking if we checked all variants
expected = set()
for name in self.truths["variant_set"]:
variant = self.truths["variants"][name]["variant"]
if variant.chrom == "01":
if variant.pos == 67000:
expected.add(name)
self.assertEqual(seen_variants, expected)

def test_iter_seeks(self):
"""Tests the _iter_seeks function."""
# Fetching random seeks from the index
Expand Down

0 comments on commit cb33190

Please sign in to comment.