In [1]:
%reload_ext autoreload
%autoreload 1

In [2]:
import numpy as np
from time import time

In [3]:
import cProfile
import pstats

In [4]:
%aimport malariagen_data.ag3

In [5]:
%aimport malariagen_data.veff 

In [6]:
veff = malariagen_data.veff

In [7]:
ag3 = malariagen_data.Ag3("gs://vo_agam_release")

In [8]:
genome = ag3._open_genome()
genome

<zarr.hierarchy.Group '/'>

In [9]:
list(genome)

['2L', '2R', '3L', '3R', 'Mt', 'UNKN', 'X', 'Y_unplaced']

In [11]:
genome["3R"][0:10].tobytes().decode()

'CCTCTACGTT'

In [11]:
%%time
# this will take some time, loading gff, one-off cost
ann = veff.Annotator(
    genome=genome,
    gff3_path="gs://vo_agam_release/reference/genome/agamp4/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.12.gff3.gz",
)

CPU times: user 15.5 s, sys: 206 ms, total: 15.7 s
Wall time: 22.5 s


In [13]:
%%time
for effect in ann.get_effects(chrom='2L', pos=2429745, ref='A', alt='T',
                                  transcript_ids=["AGAP004707-RA"]):
       print(effect)

VariantEffect(effect='NON_SYNONYMOUS_CODING', impact='MODERATE', chrom='2L', pos=2429745, ref='A', alt='T', vlen=0, ref_start=2429745, ref_stop=2429745, gene_id='AGAP004707', gene_start=2358158, gene_stop=2431617, gene_strand='+', transcript_id='AGAP004707-RA', transcript_start=2358158, transcript_stop=2431617, transcript_strand='+', cds_id='AGAP004707-PA', cds_start=2429556, cds_stop=2429801, cds_strand='+', intron_start=None, intron_stop=None, intron_5prime_dist=None, intron_3prime_dist=None, intron_cds_5prime=None, intron_cds_3prime=None, ref_cds_start=4545, ref_cds_stop=4545, ref_intron_start=None, ref_intron_stop=None, ref_start_phase=0, ref_codon='Aat', alt_codon='Tat', codon_change='Aat/Tat', aa_pos=1516, ref_aa='N', alt_aa='Y', aa_change='N1516Y')
CPU times: user 1.37 ms, sys: 0 ns, total: 1.37 ms
Wall time: 1.33 ms


In [14]:
pos, ref, alt = ag3.snp_sites('2L')

In [15]:
import allel

In [16]:
start_idx = allel.SortedIndex(pos).locate_key(2429745)
start_idx

2150929

In [17]:
pp = pos[start_idx:start_idx+100].compute()
rr = ref[start_idx:start_idx+100].compute()
aaa = alt[start_idx:start_idx+100].compute()

In [18]:
def testf(n, show=False):
    # loop over sites
    for i, (p, r, aa) in enumerate(zip(pp, rr, aaa)):
        if i < n:
            # loop over alt alleles
            for a in aa:
                for effect in ann.get_effects(chrom='2L', pos=p, ref=r.decode(), alt=a.decode(),
                                              transcript_ids=["AGAP004707-RA"]):
                    if show:
                        print(effect)
    

In [20]:
%%time
testf(20)

CPU times: user 146 ms, sys: 3.89 ms, total: 150 ms
Wall time: 149 ms


In [33]:
%%time
x = ref.compute()

CPU times: user 830 ms, sys: 43 ms, total: 873 ms
Wall time: 1.93 s


In [34]:
x.nbytes

48525747

In [23]:
cProfile.run('testf(100)', sort='cumtime')

         469969 function calls (465991 primitive calls) in 0.439 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.439    0.439 {built-in method builtins.exec}
        1    0.000    0.000    0.439    0.439 <string>:1(<module>)
        1    0.002    0.002    0.439    0.439 <ipython-input-18-ac85f549eef5>:1(testf)
      600    0.002    0.000    0.437    0.001 veff.py:135(get_effects)
      600    0.003    0.000    0.245    0.000 veff.py:220(_get_gene_effects)
      600    0.009    0.000    0.225    0.000 veff.py:258(_get_transcript_effects)
      600    0.003    0.000    0.212    0.000 veff.py:325(_get_within_transcript_effects)
      300    0.001    0.000    0.167    0.001 veff.py:58(find)
      300    0.001    0.000    0.167    0.001 intervals.py:216(search)
      300    0.002    0.000    0.165    0.001 intervals.py:190(_search_tree)
      300    0.003    0.000    0.121    0.000 intervaltree

In [22]:
cProfile.run('testf(20)', sort='time')

         118444 function calls (117664 primitive calls) in 0.131 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    22620    0.023    0.000    0.028    0.000 base.py:570(__getattr__)
    25020    0.016    0.000    0.016    0.000 interval.py:159(__eq__)
   840/60    0.010    0.000    0.034    0.001 node.py:309(search_point)
     9900    0.009    0.000    0.009    0.000 interval.py:173(__cmp__)
       60    0.007    0.000    0.010    0.000 veff.py:356(<listcomp>)
     2460    0.006    0.000    0.024    0.000 {method 'add' of 'set' objects}
    24480    0.005    0.000    0.005    0.000 {method 'index' of 'list' objects}
       60    0.003    0.000    0.003    0.000 veff.py:360(<listcomp>)
      180    0.003    0.000    0.021    0.000 {built-in method builtins.sorted}
     9900    0.003    0.000    0.012    0.000 interval.py:204(__lt__)
       60    0.003    0.000    0.010    0.000 veff.py:334(<listcomp>)
       60    0.003   

In [15]:
ann.get_feature("AGAP004707-RD")

('2L',
 'VectorBase',
 'mRNA',
 2358158,
 2431617,
 '.',
 '+',
 '.',
 'AGAP004707-RD',
 'AGAP004707')

In [16]:
ann.get_children("AGAP004707")

[('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RA',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RB',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RC',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RD',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RE',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RF',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RG',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RH',
  'AGAP004707'),
 ('2L',
  'VectorBase',
  'mRNA',
  2358158,
  2431617,
  '.',
  '+',
  '.',
  'AGAP004707-RI',


In [17]:
ann.find("2L", 2358158, 2358159)

[('2L', 'VectorBase', 'chromosome', 1, 49364325, '.', '.', '.', '2L', None),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PA',
  'AGAP004707-RA'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PB',
  'AGAP004707-RB'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PC',
  'AGAP004707-RC'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PD',
  'AGAP004707-RD'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PE',
  'AGAP004707-RE'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PF',
  'AGAP004707-RF'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PG',
  'AGAP004707-RG'),
 ('2L',
  'VectorBase',
  'CDS',
  2358158,
  2358304,
  '.',
  '+',
  '0',
  'AGAP004707-PH',
  'AGAP004707-RH'),
 ('

In [18]:
ann.get_effects("2L", 2422652, "A", "T", transcript_ids=["AGAP004707-RA"])

<generator object get_effects at 0x7f3bb40cbaf0>

In [19]:
%%time
for effect in ann.get_effects("2L", 2422652, "A", "T", transcript_ids=["AGAP004707-RD"]): print(effect)

VariantEffect(effect='NON_SYNONYMOUS_CODING', impact='MODERATE', chrom='2L', pos=2422652, ref='A', alt='T', vlen=0, ref_start=2422652, ref_stop=2422652, gene_id='AGAP004707', gene_start=2358158, gene_stop=2431617, gene_strand='+', transcript_id='AGAP004707-RD', transcript_start=2358158, transcript_stop=2431617, transcript_strand='+', cds_id='AGAP004707-PD', cds_start=2422468, cds_stop=2422655, cds_strand='+', intron_start=None, intron_stop=None, intron_5prime_dist=None, intron_3prime_dist=None, intron_cds_5prime=None, intron_cds_3prime=None, ref_cds_start=2984, ref_cds_stop=2984, ref_intron_start=None, ref_intron_stop=None, ref_start_phase=2, ref_codon='ttA', alt_codon='ttT', codon_change='ttA/ttT', aa_pos=995, ref_aa='L', alt_aa='F', aa_change='L995F')
CPU times: user 1.44 s, sys: 160 ms, total: 1.6 s
Wall time: 5.65 s


In [14]:
cProfile.run('for effect in ann.get_effects("2L", 2422652, "A", "T", transcript_ids=["AGAP004707-RD"]): print(effect)')

VariantEffect(effect='NON_SYNONYMOUS_CODING', impact='MODERATE', chrom='2L', pos=2422652, ref='A', alt='T', vlen=0, ref_start=2422652, ref_stop=2422652, gene_id='AGAP004707', gene_start=2358158, gene_stop=2431617, gene_strand='+', transcript_id='AGAP004707-RD', transcript_start=2358158, transcript_stop=2431617, transcript_strand='+', cds_id='AGAP004707-PD', cds_start=2422468, cds_stop=2422655, cds_strand='+', intron_start=None, intron_stop=None, intron_5prime_dist=None, intron_3prime_dist=None, intron_cds_5prime=None, intron_cds_3prime=None, ref_cds_start=2984, ref_cds_stop=2984, ref_intron_start=None, ref_intron_stop=None, ref_start_phase=2, ref_codon='ttA', alt_codon='ttT', codon_change='ttA/ttT', aa_pos=995, ref_aa='L', alt_aa='F', aa_change='L995F')
         2753 function calls (2738 primitive calls) in 5.819 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    5.819    5.819 <string>:1(<module

In [None]:
%%time
for effect in ann.get_effects(chrom='2L', pos=2429745, ref='A', alt='T',
                                  transcript_ids=["AGAP004707-RA"]):
       print(effect)

In [None]:
df_short = df[:10]

In [None]:
%%time
test = []
for row in df_short.itertuples(index=True):
    for effect in ann.get_effects(chrom='2L', pos=row.position, ref=row.ref_allele, alt=row.alt_alleles,
                                  transcript_ids=["AGAP004707-RA"]):
        test.append(effect.effect)
        

In [24]:
%%time
df = ag3.snp_effects('AGAP004707-RA', 'gamb_colu', 0)

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
CPU times: user 13.6 s, sys: 1.59 s, total: 15.2 s
Wall time: 34.6 s


In [31]:
%%time
df = ag3.snp_effects('AGAP004707-RA', 'gamb_colu', 1)

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
CPU times: user 16 s, sys: 2.01 s, total: 18 s
Wall time: 44.6 s


In [32]:
%%time
df = ag3.snp_effects('AGAP004707-RA', 'gamb_colu', 2)

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
CPU times: user 18.5 s, sys: 2.37 s, total: 20.8 s
Wall time: 53.4 s


In [33]:
%%time
df = ag3.snp_effects('AGAP004707-RA', 'gamb_colu', 3)

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
CPU times: user 20.7 s, sys: 2.52 s, total: 23.2 s
Wall time: 1min 2s


In [34]:
%%time
df = ag3.snp_effects('AGAP004707-RA', 'gamb_colu', 4)

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
CPU times: user 22.3 s, sys: 2.86 s, total: 25.2 s
Wall time: 1min 15s


## cProfile

In [50]:
cProfile.run('ag3.snp_effects("AGAP004707-RA", "gamb_colu", 0)', 'veff_stats_0')
o = pstats.Stats('veff_stats_0')
o.strip_dirs().sort_stats(-1).print_stats()

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
Thu Feb 25 15:50:17 2021    veff_stats_0

         781195 function calls (777232 primitive calls) in 36.089 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    0.003    0.001 <__array_function__ internals>:2(any)
        6    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(append)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(argsort)
        5    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(atleast_2d)
        3    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(bincount)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(can_cast)
       23    0.000    0.000    0.042    0.002 <__array_function__ internals>:2(concatenate)
        3    0.000    0.000    0.000    0.000 <__array_function__ internals

        1    0.000    0.000    0.000    0.000 core.py:4442(chunks_from_arrays)
        4    0.000    0.000    0.000    0.000 core.py:445(__getitem__)
       94    0.000    0.000    0.000    0.000 core.py:4465(shape)
        2    0.000    0.000    0.000    0.000 core.py:4472(<listcomp>)
   190/95    0.000    0.000    0.000    0.000 core.py:4478(deepfirst)
        4    0.015    0.004    0.063    0.016 core.py:4651(concatenate3)
      441    0.000    0.000    0.000    0.000 core.py:4680(<lambda>)
      100    0.000    0.000    0.000    0.000 core.py:4684(<genexpr>)
        1    0.000    0.000    0.000    0.000 core.py:4704(dtype)
        4    0.000    0.000    0.000    0.000 core.py:481(__init__)
        4    0.000    0.000    0.000    0.000 core.py:490(__call__)
        4    0.000    0.000    0.000    0.000 core.py:494(quote)
        4    0.000    0.000    0.000    0.000 core.py:573(get_basic_selection)
        4    0.000    0.000    0.000    0.000 core.py:733(_get_basic_selection_nd)
  

        2    0.000    0.000    0.000    0.000 shape_base.py:219(_vhstack_dispatcher)
        2    0.000    0.000    0.001    0.000 shape_base.py:223(vstack)
        5    0.000    0.000    0.000    0.000 shape_base.py:78(_atleast_2d_dispatcher)
        5    0.000    0.000    0.000    0.000 shape_base.py:82(atleast_2d)
       19    0.000    0.000    0.000    0.000 slicing.py:1258(__init__)
       19    0.000    0.000    0.000    0.000 slicing.py:1271(__hash__)
       19    0.000    0.000    0.001    0.000 slicing.py:1275(_cumsum)
       19    0.000    0.000    0.001    0.000 slicing.py:1285(cached_cumsum)
        6    0.000    0.000    0.000    0.000 slicing.py:150(<genexpr>)
        7    0.000    0.000    0.000    0.000 slicing.py:157(<genexpr>)
        3    0.000    0.000    1.500    0.500 slicing.py:168(slice_with_newaxes)
        3    0.000    0.000    0.000    0.000 slicing.py:175(<listcomp>)
        3    0.000    0.000    0.000    0.000 slicing.py:176(<listcomp>)
        3    0.000

<pstats.Stats at 0x7f3b78614550>

In [63]:
import dask
dask.config.set(scheduler='single-threaded')

<dask.config.set at 0x7f3b78634a20>

In [65]:
cProfile.run('ag3.snp_effects("AGAP004707-RA", "gamb_colu", 1)', sort='time')

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
         767988 function calls (764758 primitive calls) in 30.003 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1771   20.505    0.012   20.505    0.012 {method 'acquire' of '_thread.lock' objects}
    92/71    3.697    0.040    5.431    0.076 {built-in method numpy.core._multiarray_umath.implement_array_function}
        3    1.309    0.436    1.309    0.436 {built-in method _hashlib.openssl_sha1}
     19/8    1.006    0.053    4.031    0.504 slicing.py:676(posify_index)
        3    0.963    0.321    0.963    0.321 {method 'searchsorted' of 'numpy.ndarray' objects}
        3    0.406    0.135    0.406    0.135 function_base.py:1152(diff)
  971/965    0.370    0.000    0.370    0.000 {built-in method numpy.array}
        7    0.359    0.051    0.359    0.051 {method 'nonzero' of 'numpy.ndarray' objects}
        3    0.249    0.083 

      299    0.000    0.000    0.000    0.000 base.py:570(__getattr__)
        9    0.000    0.000    0.001    0.000 core.py:105(__init__)
      620    0.000    0.000    0.000    0.000 {built-in method builtins.issubclass}
      268    0.000    0.000    0.000    0.000 indexing.py:172(<genexpr>)
      4/1    0.000    0.000   15.352   15.352 ag3.py:327(snp_sites)
      356    0.000    0.000    0.000    0.000 local.py:305(default_get_id)
       32    0.000    0.000    0.000    0.000 inspect.py:2452(__init__)
      264    0.000    0.000    0.000    0.000 threading.py:1303(main_thread)
     1024    0.000    0.000    0.000    0.000 _collections_abc.py:491(<genexpr>)
        5    0.000    0.000    0.001    0.000 highlevelgraph.py:417(_from_collection)
       12    0.000    0.000    0.000    0.000 {built-in method posix.urandom}
        5    0.000    0.000    0.000    0.000 core.py:1028(blockdims_from_blockshape)
      480    0.000    0.000    0.000    0.000 utils.py:482(dispatch)
        4   

        4    0.000    0.000    0.000    0.000 optimization.py:558(<setcomp>)
       93    0.000    0.000    0.000    0.000 order.py:465(<genexpr>)
        2    0.000    0.000    0.001    0.001 managers.py:1684(create_block_manager_from_arrays)
       23    0.000    0.000    0.000    0.000 blocks.py:135(_check_ndim)
       11    0.000    0.000    0.000    0.000 blocks.py:256(make_block_same_class)
        5    0.000    0.000    0.000    0.000 base.py:1755(is_floating)
        6    0.000    0.000    0.000    0.000 missing.py:358(array_equivalent)
        8    0.000    0.000    0.000    0.000 function_base.py:4616(append)
      160    0.000    0.000    0.000    0.000 {built-in method builtins.iter}
       10    0.000    0.000    0.000    0.000 core.py:1052(<genexpr>)
        8    0.000    0.000    0.000    0.000 typing.py:1033(_abc_negative_cache_version)
       13    0.000    0.000    0.000    0.000 {method 'hexdigest' of '_hashlib.HASH' objects}
       34    0.000    0.000    0.000    0

        3    0.000    0.000    0.000    0.000 {built-in method numpy.core._multiarray_umath.normalize_axis_index}
       11    0.000    0.000    0.000    0.000 _methods.py:53(_any)
        8    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(ndim)
        3    0.000    0.000    0.000    0.000 _dtype.py:68(_construction_repr)
       27    0.000    0.000    0.000    0.000 multiarray.py:143(concatenate)
        8    0.000    0.000    0.000    0.000 inspect.py:505(_is_wrapper)
       12    0.000    0.000    0.000    0.000 {method 'count' of 'list' objects}
        4    0.000    0.000    0.000    0.000 {built-in method math.log}
       45    0.000    0.000    0.000    0.000 node.py:505(__getitem__)
        8    0.000    0.000    0.000    0.000 slicing.py:778(<listcomp>)
        4    0.000    0.000    0.000    0.000 context.py:59(__get__)
        4    0.000    0.000    0.000    0.000 itertoolz.py:249(unique)
        6    0.000    0.000    0.000    0.000 itertoolz.py:801(get

        1    0.000    0.000    0.000    0.000 merge.py:740(_maybe_restore_index_levels)
        2    0.000    0.000    0.000    0.000 construction.py:274(<listcomp>)
        2    0.000    0.000    0.000    0.000 construction.py:278(<listcomp>)
        2    0.000    0.000    0.000    0.000 construction.py:281(<listcomp>)
        6    0.000    0.000    0.000    0.000 managers.py:1689(<genexpr>)
        1    0.000    0.000    0.000    0.000 managers.py:1855(_asarray_compat)
        2    0.000    0.000    0.000    0.000 managers.py:1931(<listcomp>)
        9    0.000    0.000    0.000    0.000 indexing.py:715(<genexpr>)
       10    0.000    0.000    0.000    0.000 blocks.py:201(internal_values)
        1    0.000    0.000    0.000    0.000 blocks.py:213(get_values)
        3    0.000    0.000    0.000    0.000 blocks.py:229(fill_value)
        5    0.000    0.000    0.000    0.000 generic.py:232(attrs)
        1    0.000    0.000    0.000    0.000 range.py:329(is_monotonic_increasing)
   

In [58]:
p.print_stats()

Thu Feb 25 15:53:04 2021    veff_stats_1

         786310 function calls (782293 primitive calls) in 44.759 seconds

   Random listing order was used

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       15    0.000    0.000    0.000    0.000 {method 'indices' of 'slice' objects}
        9    0.000    0.000    0.000    0.000 {method '__contains__' of 'frozenset' objects}
     2667    0.001    0.000    0.001    0.000 {method 'add' of 'set' objects}
     1007    0.001    0.000    0.001    0.000 {method 'copy' of 'set' objects}
      286    0.000    0.000    0.000    0.000 {method 'discard' of 'set' objects}
      279    0.000    0.000    0.000    0.000 {method 'issubset' of 'set' objects}
      731    0.000    0.000    0.000    0.000 {method 'pop' of 'set' objects}
     2730    0.001    0.000    0.001    0.000 {method 'remove' of 'set' objects}
        7    0.000    0.000    0.000    0.000 {method 'update' of 'set' objects}
      225    0.000    0.000    0.000  

        4    0.000    0.000    0.000    0.000 blockwise.py:1178(fuse_roots)
        5    0.000    0.000    0.000    0.000 core.py:218(<listcomp>)
        5    0.000    0.000    0.000    0.000 core.py:220(<listcomp>)
        5    0.000    0.000    0.001    0.000 core.py:207(slices_from_chunks)
        9    0.000    0.000    0.000    0.000 core.py:252(<genexpr>)
        4    0.000    0.000    0.000    0.000 core.py:263(<listcomp>)
        4    0.000    0.000    0.001    0.000 core.py:226(getem)
      348    0.000    0.000    0.000    0.000 core.py:338(<lambda>)
        3    0.000    0.000    0.044    0.015 core.py:290(_concatenate2)
       10    0.000    0.000    0.000    0.000 core.py:1052(<genexpr>)
        5    0.000    0.000    0.000    0.000 core.py:1028(blockdims_from_blockshape)
        4    0.000    0.000    0.066    0.016 core.py:1057(finalize)
       16    0.000    0.000    0.000    0.000 core.py:1122(<genexpr>)
        7    0.000    0.000    0.002    0.000 core.py:1109(__new__

        4    0.000    0.000    0.000    0.000 concat.py:525(_combine_concat_plans)
       10    0.000    0.000    0.000    0.000 managers.py:138(<listcomp>)
       10    0.000    0.000    0.000    0.000 managers.py:132(__init__)
        4    0.000    0.000    0.000    0.000 managers.py:156(from_blocks)
       10    0.000    0.000    0.000    0.000 managers.py:163(blknos)
       10    0.000    0.000    0.000    0.000 managers.py:179(blklocs)
       60    0.000    0.000    0.000    0.000 managers.py:214(<genexpr>)
       20    0.000    0.000    0.000    0.000 managers.py:212(shape)
       33    0.000    0.000    0.000    0.000 managers.py:216(ndim)
        3    0.000    0.000    0.000    0.000 managers.py:220(set_axis)
        6    0.000    0.000    0.000    0.000 generic.py:5097(__finalize__)
        6    0.000    0.000    0.000    0.000 generic.py:5125(__getattr__)
       19    0.000    0.000    0.001    0.000 generic.py:5143(__setattr__)
        2    0.000    0.000    0.004    0.002 g

       64    0.000    0.000    0.000    0.000 common.py:188(<lambda>)
       64    0.000    0.000    0.000    0.000 common.py:183(classes_and_not_datetimelike)
       36    0.000    0.000    0.000    0.000 common.py:194(is_object_dtype)
       18    0.000    0.000    0.000    0.000 common.py:224(is_sparse)
        4    0.000    0.000    0.000    0.000 common.py:348(is_datetime64_dtype)
       24    0.000    0.000    0.000    0.000 common.py:381(is_datetime64tz_dtype)
       64    0.000    0.000    0.000    0.000 common.py:422(is_timedelta64_dtype)
       76    0.000    0.000    0.000    0.000 common.py:456(is_period_dtype)
       76    0.000    0.000    0.000    0.000 common.py:492(is_interval_dtype)
       82    0.000    0.000    0.000    0.000 common.py:530(is_categorical_dtype)
        3    0.000    0.000    0.000    0.000 common.py:595(condition)
       12    0.000    0.000    0.000    0.000 common.py:603(<genexpr>)
        3    0.000    0.000    0.000    0.000 common.py:598(is_exc

        3    0.000    0.000    0.000    0.000 {method 'digest' of '_hashlib.HASH' objects}
       11    0.000    0.000    0.000    0.000 {method 'hexdigest' of '_hashlib.HASH' objects}
        8    0.000    0.000    0.000    0.000 inspect.py:505(_is_wrapper)
        8    0.000    0.000    0.000    0.000 inspect.py:485(unwrap)
        8    0.000    0.000    0.000    0.000 inspect.py:2102(_signature_from_function)
        8    0.000    0.000    0.000    0.000 inspect.py:2183(_signature_from_callable)
       32    0.000    0.000    0.000    0.000 inspect.py:2452(__init__)
       32    0.000    0.000    0.000    0.000 inspect.py:2502(name)
       40    0.000    0.000    0.000    0.000 inspect.py:2781(<genexpr>)
        8    0.000    0.000    0.000    0.000 inspect.py:2732(__init__)
        8    0.000    0.000    0.000    0.000 inspect.py:2811(from_callable)
        8    0.000    0.000    0.000    0.000 inspect.py:2817(parameters)
        8    0.000    0.000    0.000    0.000 inspect.py:306

<pstats.Stats at 0x7f3b94773cf8>

In [57]:
p.strip_dirs().sort_stats(-1).print_stats()

<pstats.Stats at 0x7f3b94773cf8>

In [51]:
cProfile.run('ag3.snp_effects("AGAP004707-RA", "gamb_colu", 2)', 'veff_stats_2')
q = pstats.Stats('veff_stats_2')
q.strip_dirs().sort_stats(-1).print_stats()

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
Thu Feb 25 15:51:13 2021    veff_stats_2

         789464 function calls (785420 primitive calls) in 55.337 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    0.003    0.001 <__array_function__ internals>:2(any)
        8    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(append)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(argsort)
        6    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(atleast_2d)
        4    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(bincount)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(can_cast)
       27    0.000    0.000    0.041    0.002 <__array_function__ internals>:2(concatenate)
        4    0.000    0.000    0.000    0.000 <__array_function__ internals

       14    0.000    0.000   19.216    1.373 core.py:733(_get_basic_selection_nd)
       16    0.000    0.000    0.000    0.000 core.py:8(ishashable)
    32/16    0.000    0.000    0.000    0.000 core.py:86(_execute_task)
       10    0.000    0.000    0.000    0.000 coroutines.py:270(iscoroutine)
        4    0.000    0.000    0.000    0.000 dicttoolz.py:10(_get_factory)
        4    0.000    0.000    0.000    0.000 dicttoolz.py:18(merge)
       57    0.000    0.000    0.000    0.000 dtypes.py:1119(is_dtype)
       57    0.000    0.000    0.000    0.000 dtypes.py:906(is_dtype)
       32    0.000    0.000    0.000    0.000 enum.py:267(__call__)
       32    0.000    0.000    0.000    0.000 enum.py:517(__new__)
       10    0.000    0.000    0.000    0.000 events.py:104(__init__)
        1    0.000    0.000    0.001    0.001 frame.py:1017(itertuples)
        4    0.000    0.000    0.000    0.000 frame.py:1088(<genexpr>)
        6    0.000    0.000    0.000    0.000 frame.py:1099(__len_

        7    0.000    0.000    0.000    0.000 series.py:427(dtype)
        9    0.000    0.000    0.000    0.000 series.py:442(name)
        6    0.000    0.000    0.000    0.000 series.py:492(name)
       10    0.000    0.000    0.000    0.000 series.py:540(_values)
        1    0.000    0.000    0.000    0.000 series.py:574(array)
        2    0.000    0.000    0.000    0.000 series.py:595(__len__)
        2    0.000    0.000    0.000    0.000 shape_base.py:208(_arrays_for_stack_dispatcher)
        2    0.000    0.000    0.000    0.000 shape_base.py:219(_vhstack_dispatcher)
        2    0.000    0.000    0.001    0.000 shape_base.py:223(vstack)
        6    0.000    0.000    0.000    0.000 shape_base.py:78(_atleast_2d_dispatcher)
        6    0.000    0.000    0.000    0.000 shape_base.py:82(atleast_2d)
       19    0.000    0.000    0.000    0.000 slicing.py:1258(__init__)
       19    0.000    0.000    0.000    0.000 slicing.py:1271(__hash__)
       19    0.000    0.000    0.001   

<pstats.Stats at 0x7f3b94756898>

In [52]:
cProfile.run('ag3.snp_effects("AGAP004707-RA", "gamb_colu", 3)', 'veff_stats_3')
q = pstats.Stats('veff_stats_3')
q.strip_dirs().sort_stats(-1).print_stats()

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
Thu Feb 25 15:52:19 2021    veff_stats_3

         793464 function calls (789393 primitive calls) in 65.315 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    0.003    0.001 <__array_function__ internals>:2(any)
        8    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(append)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(argsort)
        6    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(atleast_2d)
        4    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(bincount)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(can_cast)
       27    0.000    0.000    0.045    0.002 <__array_function__ internals>:2(concatenate)
        4    0.000    0.000    0.000    0.000 <__array_function__ internals

       94    0.000    0.000    0.000    0.000 core.py:4465(shape)
        2    0.000    0.000    0.000    0.000 core.py:4472(<listcomp>)
   190/95    0.000    0.000    0.000    0.000 core.py:4478(deepfirst)
        4    0.016    0.004    0.064    0.016 core.py:4651(concatenate3)
      441    0.000    0.000    0.000    0.000 core.py:4680(<lambda>)
      100    0.000    0.000    0.000    0.000 core.py:4684(<genexpr>)
        1    0.000    0.000    0.000    0.000 core.py:4704(dtype)
        4    0.000    0.000    0.000    0.000 core.py:481(__init__)
        4    0.000    0.000    0.000    0.000 core.py:490(__call__)
        4    0.000    0.000    0.000    0.000 core.py:494(quote)
       19    0.000    0.000   29.705    1.563 core.py:573(get_basic_selection)
       19    0.000    0.000   29.705    1.563 core.py:733(_get_basic_selection_nd)
       16    0.000    0.000    0.000    0.000 core.py:8(ishashable)
    32/16    0.000    0.000    0.000    0.000 core.py:86(_execute_task)
       15   

        3    0.000    0.000    0.000    0.000 range.py:214(start)
        3    0.000    0.000    0.000    0.000 range.py:237(stop)
        3    0.000    0.000    0.000    0.000 range.py:260(step)
        7    0.000    0.000    0.000    0.000 range.py:320(dtype)
        3    0.000    0.000    0.000    0.000 range.py:324(is_unique)
        1    0.000    0.000    0.000    0.000 range.py:329(is_monotonic_increasing)
        3    0.000    0.000    0.000    0.000 range.py:393(_shallow_copy)
        1    0.000    0.000    0.000    0.000 range.py:404(copy)
        2    0.000    0.000    0.000    0.000 range.py:452(equals)
        1    0.000    0.000    0.002    0.002 range.py:626(join)
       33    0.000    0.000    0.000    0.000 range.py:687(__len__)
        1    0.000    0.000    0.000    0.000 range.py:697(__getitem__)
        3    0.000    0.000    0.000    0.000 range.py:86(__new__)
        8    0.000    0.000    0.000    0.000 re.py:231(compile)
        8    0.000    0.000    0.000    0

<pstats.Stats at 0x7f3b78e9da20>

In [13]:
%%time
df = ag3.snp_effects("AGAP004707-RA", "gamb_colu")

transcript : AGAP004707-RA
chromosome : 2L 
start : 2358158
stop : 2431617
strand : +
CPU times: user 1min 25s, sys: 1.63 s, total: 1min 26s
Wall time: 1min 55s


In [15]:
df[:10]

Unnamed: 0,position,ref_allele,alt_alleles,effect
0,2358158,A,C,START_LOST
1,2358158,A,T,START_LOST
2,2358158,A,G,START_LOST
3,2358159,T,A,NON_SYNONYMOUS_CODING
4,2358159,T,C,NON_SYNONYMOUS_CODING
5,2358159,T,G,NON_SYNONYMOUS_CODING
6,2358160,G,A,NON_SYNONYMOUS_CODING
7,2358160,G,C,NON_SYNONYMOUS_CODING
8,2358160,G,T,NON_SYNONYMOUS_CODING
9,2358161,A,C,NON_SYNONYMOUS_CODING
