Skip to content

Commit

Permalink
Trying to increase speed for UK Biobank
Browse files Browse the repository at this point in the history
  • Loading branch information
lemieuxl committed Jul 13, 2017
1 parent 0aa6014 commit fa22edb
Showing 1 changed file with 27 additions and 2 deletions.
29 changes: 27 additions & 2 deletions pybgen/pybgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@

from six.moves import range

try:
from struct import iter_unpack
HAS_ITER_UNPACK = True
except ImportError:
HAS_ITER_UNPACK = False

try:
import zstd
HAS_ZSTD = True
Expand Down Expand Up @@ -456,8 +462,27 @@ def _get_curr_variant_dosage(self):

# Reading the probabilities (don't forget we allow only for diploid
# values)
# TODO: Check that len(data) * 8 / b / 2 = nb_samples
probs = _pack_bits(data, b) / (2**b - 1)
probs = None
if HAS_ITER_UNPACK and b == 8:
probs = np.fromiter(
(_[0] for _ in iter_unpack("<B", data)),
dtype=np.uint,
) / (2**b - 1)

elif HAS_ITER_UNPACK and b == 16:
probs = np.fromiter(
(_[0] for _ in iter_unpack("<H", data)),
dtype=np.uint,
) / (2**b - 1)

elif HAS_ITER_UNPACK and b == 32:
probs = np.fromiter(
(_[0] for _ in iter_unpack("<L", data)),
dtype=np.uint,
) / (2**b - 1)

else:
probs = _pack_bits(data, b) / (2**b - 1)
probs.shape = (self._nb_samples, 2)

# Computing the dosage
Expand Down

0 comments on commit fa22edb

Please sign in to comment.