# Extract mutations in VGSC

This notebook extracts data on all mutations in the VGSC gene.

## Setup

In [1]:
%run setup.ipynb

In [2]:
# download gene annotations from vectorbase
!wget \
    --no-clobber \
    -O ../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz \
    https://www.vectorbase.org/download/anopheles-gambiae-pestbasefeaturesagamp44gff3gz


File `../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz' already there; not retrieving.


In [3]:
# download the Davies et al. (2007) gene models
!wget \
    --no-clobber \
    -O ../data/davies_vgsc_model_20170125.gff3 \
    http://alimanfoo.github.io/assets/davies_vgsc_model_20170125.gff3


File `../data/davies_vgsc_model_20170125.gff3' already there; not retrieving.


In [4]:
# load the vectorbase geneset
geneset_agamp44 = allel.FeatureTable.from_gff3('../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz',
                                               attributes=['ID', 'Parent'])
geneset_agamp44 = geneset_to_pandas(geneset_agamp44)
geneset_agamp44.head()

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent
0,2L,VectorBase,chromosome,1,49364325,-1,.,-1,2L,.
1,2L,VectorBase,gene,157348,186936,-1,-,-1,AGAP004677,.
2,2L,VectorBase,mRNA,157348,181305,-1,-,-1,AGAP004677-RA,AGAP004677
3,2L,VectorBase,three_prime_UTR,157348,157495,-1,-,-1,.,AGAP004677-RA
4,2L,VectorBase,exon,157348,157623,-1,-,-1,.,AGAP004677-RA


In [5]:
# subset to VGSC
region_vgsc = SeqFeature('2L', 2358158, 2431617)
geneset_agamp44_vgsc = geneset_agamp44.query(region_vgsc.query).copy()
# replace CDS IDs as not informative
geneset_agamp44_vgsc['ID'].values[(geneset_agamp44_vgsc.type == 'CDS').values] = ''
geneset_agamp44_vgsc.type.value_counts()

CDS     93
exon    93
mRNA     3
gene     1
Name: type, dtype: int64

In [6]:
# load the Davies geneset
geneset_davies = allel.FeatureTable.from_gff3('../data/davies_vgsc_model_20170125.gff3',
                                              attributes=['ID', 'Parent'])
geneset_davies = geneset_to_pandas(geneset_davies)
geneset_davies.head()

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent
0,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C1N2,AGAP004707
1,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C3N2,AGAP004707
2,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C5N2,AGAP004707
3,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C7N2,AGAP004707
4,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C8N2,AGAP004707


In [7]:
# make a combined geneset
geneset_vgsc_combined = pandas.concat([geneset_agamp44_vgsc, geneset_davies])
geneset_vgsc_combined.query("type == 'mRNA'")

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent
666,2L,VectorBase,mRNA,2358158,2431617,-1,+,-1,AGAP004707-RA,AGAP004707
729,2L,VectorBase,mRNA,2358158,2431617,-1,+,-1,AGAP004707-RB,AGAP004707
792,2L,VectorBase,mRNA,2358158,2431617,-1,+,-1,AGAP004707-RC,AGAP004707
0,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C1N2,AGAP004707
1,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C3N2,AGAP004707
2,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C5N2,AGAP004707
3,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C7N2,AGAP004707
4,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C8N2,AGAP004707
5,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C10N2,AGAP004707
6,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C11N2,AGAP004707


In [8]:
# setup a variant annotator
annotator = veff.Annotator(
    fasta_path=phase1_ar3.genome_fn, 
    gff3_path=['../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz',
               '../data/davies_vgsc_model_20170125.gff3'],
    seqid='2L'
)
annotator

<veff.Annotator at 0x7f356b147438>

In [10]:
# identify VGSC transcripts
transcript_ids = [f.feature_id for f in annotator.get_children('AGAP004707')]
transcript_ids

['AGAP004707-RA',
 'AGAP004707-RB',
 'AGAP004707-RC',
 'Davies-C1N2',
 'Davies-C3N2',
 'Davies-C5N2',
 'Davies-C7N2',
 'Davies-C8N2',
 'Davies-C10N2',
 'Davies-C11N2',
 'Davies-C1N9',
 'Davies-C8N9',
 'Davies-C1N9ck']

In [46]:
# tabulate Davies exons
tbl_davies_exons = (
    etl
    .fromdataframe(geneset_davies)
    .eq('type', 'CDS')
    .cutout('Parent', 'source', 'type', 'score', 'strand', 'phase')
    .merge(key=('start', 'end'))
    .rename('seqid', 'exon_seqid')
    .rename('ID', 'exon')
    .rename('start', 'exon_start')
    .rename('end', 'exon_end')
    .movefield('exon_seqid', 0)
)
tbl_davies_exons.displayall()

0|exon_seqid,1|exon_start,2|exon_end,3|exon
2L,2358158,2358304,1
2L,2359640,2359672,2j
2L,2361989,2362144,3
2L,2381065,2381270,4
2L,2382270,2382398,5
2L,2385694,2385785,6
2L,2390129,2390341,7
2L,2390425,2390485,8
2L,2390594,2390738,9
2L,2391156,2391320,10


## Extract table of variants

In [18]:
# what fields are available?
print(', '.join(phase1_ar31.callset['2L/variants']))

ABHet, ABHom, AC, AF, ALT, AN, ANN, Accessible, BaseCounts, BaseQRankSum, CHROM, Coverage, CoverageMQ0, DP, DS, Dels, FILTER_FS, FILTER_HRun, FILTER_HighCoverage, FILTER_HighMQ0, FILTER_LowCoverage, FILTER_LowMQ, FILTER_LowQual, FILTER_NoCoverage, FILTER_PASS, FILTER_QD, FILTER_ReadPosRankSum, FILTER_RefN, FILTER_RepeatDUST, FS, HRun, HW, HaplotypeScore, HighCoverage, HighMQ0, InbreedingCoeff, LOF, LowCoverage, LowMQ, LowPairing, MLEAC, MLEAF, MQ, MQ0, MQRankSum, NDA, NMD, NoCoverage, OND, POS, QD, QUAL, REF, RPA, RU, ReadPosRankSum, RefMasked, RefN, RepeatDUST, RepeatMasker, RepeatTRF, STR, VariantType, is_snp, num_alleles, svlen


In [21]:
# what SNPEFF fields are available?
print(', '.join(phase1_ar31.callset['2L/variants/ANN'].dtype.names))

Allele, Annotation, Annotation_Impact, Gene_Name, Gene_ID, Feature_Type, Feature_ID, Transcript_BioType, Rank, HGVS_c, HGVS_p, cDNA_pos, cDNA_length, CDS_pos, CDS_length, AA_pos, AA_length, Distance


In [62]:
def tabulate_variants(callset, snpeff, seqid, start, end):
    """Build a table of variants for a given callset and genome region."""
    
    variants = callset[seqid]['variants']
    ann = snpeff[seqid]['variants']['ANN']
    pos = allel.SortedIndex(variants['POS'])
    loc = pos.locate_range(start, end)
    variants_fields = [
        'CHROM',
        'POS',
        'num_alleles',
        'REF',
        'ALT',
        'AC',
        'FILTER_PASS',
        'NoCoverage',
        'LowCoverage',
        'HighCoverage',
        'LowMQ',
        'HighMQ0',
        'RepeatDUST',
        'RepeatMasker',
        'RepeatTRF',
        'FS',
        'HRun',
        'QD',
        'ReadPosRankSum',
    ]
    ann_fields = ['Allele', 'Annotation', 'HGVS_c', 'HGVS_p', 'Feature_ID']
    cols = [variants[f][loc] for f in variants_fields] + [ann[loc][f] for f in ann_fields]

    def split_alleles(row):
        for i in range(row.num_alleles - 1):
            # break down alleles
            out = [
                row['CHROM'], 
                row['POS'], 
                row['num_alleles'], 
                row['REF'], 
                row['ALT'][i], 
                row['AC'][i], 
                i, 
            ]
            # add in remaining variant annotations
            out = out + [row[f] for f in variants_fields[6:]]
            # SNPEFF annotation only applies to first allele
            if i == 0:
                out = out + [row[f] for f in ann_fields]
            else:
                out = out + [None for f in ann_fields]
            yield out
        
    tbl = (
        etl
        .fromcolumns(cols, header=variants_fields + ann_fields)
        .rowmapmany(split_alleles, header=variants_fields[:6] + ['ALTIX'] + variants_fields[6:] + ann_fields)
        .convert('CHROM REF ALT Allele Annotation HGVS_c HGVS_p Feature_ID'.split(), lambda v: str(v, 'ascii'))
        .rename({f: 'SNPEFF_' + f for f in ann_fields})
        .addfield('check_allele', lambda row: row['SNPEFF_Allele'] is None or row['SNPEFF_Allele'] == row['ALT'])
    )
    
    return tbl

In [63]:
# build a table of variants from phase 1
tbl_variants_phase1 = tabulate_variants(phase1_ar31.callset, phase1_ar31.callset, *region_vgsc)
tbl_variants_phase1

0|CHROM,1|POS,2|num_alleles,3|REF,4|ALT,5|AC,6|ALTIX,7|FILTER_PASS,8|NoCoverage,9|LowCoverage,10|HighCoverage,11|LowMQ,12|HighMQ0,13|RepeatDUST,14|RepeatMasker,15|RepeatTRF,16|FS,17|HRun,18|QD,19|ReadPosRankSum,20|SNPEFF_Allele,21|SNPEFF_Annotation,22|SNPEFF_HGVS_c,23|SNPEFF_HGVS_p,24|SNPEFF_Feature_ID,25|check_allele
2L,2358254,2,G,A,1,0,True,0,0,10,0,0,False,False,False,9.8672,1,17.547,-0.049988,A,missense_variant,n.97G>A,p.Asp33Asn,AGAP004707-RA,True
2L,2358316,2,T,G,73,0,True,0,0,15,0,0,False,False,False,2.4844,0,16.438,1.4219,G,intron_variant,n.147+12T>G,.,AGAP004707-RA,True
2L,2358328,2,T,C,2,0,True,0,0,14,0,0,False,False,False,2.7363,0,16.062,-0.646,C,intron_variant,n.147+24T>C,.,AGAP004707-RA,True
2L,2358353,2,C,T,1,0,True,0,1,15,0,0,False,False,False,1.9512,0,9.8594,1.1582,T,intron_variant,n.147+49C>T,.,AGAP004707-RA,True
2L,2358405,2,T,A,1,0,True,0,6,14,0,0,False,False,False,20.844,1,10.859,1.1562,A,intron_variant,n.147+101T>A,.,AGAP004707-RA,True


## Annotate effects for all transcripts

In [64]:
cds_effects = [
    'NON_SYNONYMOUS_CODING', 
    'SYNONYMOUS_CODING',    
]
intron_effects = [
    'INTRONIC', 
    'SPLICE_CORE',
    'SPLICE_REGION',        
]
selected_effects = cds_effects + intron_effects

In [65]:
def lpop(l, default=None):
    """Pop the first item from a list if not empty."""
    try:
        return l[0]
    except IndexError:
        return default


In [66]:
def transcript_effect(transcript_id):
    def f(row):
        e = lpop([e for e in row.VEFF if e.transcript_id == transcript_id])
        if e and e.effect in cds_effects:
            return (e.effect, e.aa_change)
        elif e and e.effect in intron_effects:
            return (e.effect, e.intron_cds_5prime, e.intron_5prime_dist, e.intron_cds_3prime, e.intron_3prime_dist)
        else:
            return None
    return f


In [99]:
tbl_variants_phase1_eff = (
    tbl_variants_phase1
    # join in Davies exon information
    .intervalleftjoin(
        # don't include shorter exon alternatives
        tbl_davies_exons.select('exon', lambda v: v[-1] != '-'),
        lkey='CHROM', rkey='exon_seqid', lstart='POS', rstart='exon_start', lstop='POS', rstop='exon_end', include_stop=True)
    .cutout('exon_seqid')
    .addfield('VEFF', lambda row: [e for e in annotator.get_effects(chrom=row.CHROM, pos=row.POS, ref=row.REF, alt=row.ALT) 
                                   if e.effect in selected_effects])
    .addfield(transcript_ids[0], transcript_effect(transcript_ids[0]))
    .addfield(transcript_ids[1], transcript_effect(transcript_ids[1]))
    .addfield(transcript_ids[2], transcript_effect(transcript_ids[2]))
    .addfield(transcript_ids[3], transcript_effect(transcript_ids[3]))
    .addfield(transcript_ids[4], transcript_effect(transcript_ids[4]))
    .addfield(transcript_ids[5], transcript_effect(transcript_ids[5]))
    .addfield(transcript_ids[6], transcript_effect(transcript_ids[6]))
    .addfield(transcript_ids[7], transcript_effect(transcript_ids[7]))
    .addfield(transcript_ids[8], transcript_effect(transcript_ids[8]))
    .addfield(transcript_ids[9], transcript_effect(transcript_ids[9]))
    .addfield(transcript_ids[10], transcript_effect(transcript_ids[10]))
    .addfield(transcript_ids[11], transcript_effect(transcript_ids[11]))
    .addfield(transcript_ids[12], transcript_effect(transcript_ids[12]))
    .cutout('VEFF')
    .replaceall('.', None)
    .replaceall('', None)
    .cache()
)

In [100]:
tbl_variants_phase1_eff.display(20)

0|CHROM,1|POS,2|num_alleles,3|REF,4|ALT,5|AC,6|ALTIX,7|FILTER_PASS,8|NoCoverage,9|LowCoverage,10|HighCoverage,11|LowMQ,12|HighMQ0,13|RepeatDUST,14|RepeatMasker,15|RepeatTRF,16|FS,17|HRun,18|QD,19|ReadPosRankSum,20|SNPEFF_Allele,21|SNPEFF_Annotation,22|SNPEFF_HGVS_c,23|SNPEFF_HGVS_p,24|SNPEFF_Feature_ID,25|check_allele,26|exon_start,27|exon_end,28|exon,29|AGAP004707-RA,30|AGAP004707-RB,31|AGAP004707-RC,32|Davies-C1N2,33|Davies-C3N2,34|Davies-C5N2,35|Davies-C7N2,36|Davies-C8N2,37|Davies-C10N2,38|Davies-C11N2,39|Davies-C1N9,40|Davies-C8N9,41|Davies-C1N9ck
2L,2358254,2,G,A,1,0,True,0,0,10,0,0,False,False,False,9.8672,1,17.547,-0.049988,A,missense_variant,n.97G>A,p.Asp33Asn,AGAP004707-RA,True,2358158.0,2358304.0,1.0,"('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')"
2L,2358316,2,T,G,73,0,True,0,0,15,0,0,False,False,False,2.4844,0,16.438,1.4219,G,intron_variant,n.147+12T>G,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 12, 'AGAP004707-PA', -3691)","('INTRONIC', 'AGAP004707-PB', 12, 'AGAP004707-PB', -3691)","('INTRONIC', 'AGAP004707-PC', 12, 'AGAP004707-PC', -3691)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '2j', -1324)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '2j', -1324)","('INTRONIC', '1', 12, '3', -3673)"
2L,2358328,2,T,C,2,0,True,0,0,14,0,0,False,False,False,2.7363,0,16.062,-0.646,C,intron_variant,n.147+24T>C,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 24, 'AGAP004707-PA', -3679)","('INTRONIC', 'AGAP004707-PB', 24, 'AGAP004707-PB', -3679)","('INTRONIC', 'AGAP004707-PC', 24, 'AGAP004707-PC', -3679)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '2j', -1312)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '2j', -1312)","('INTRONIC', '1', 24, '3', -3661)"
2L,2358353,2,C,T,1,0,True,0,1,15,0,0,False,False,False,1.9512,0,9.8594,1.1582,T,intron_variant,n.147+49C>T,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 49, 'AGAP004707-PA', -3654)","('INTRONIC', 'AGAP004707-PB', 49, 'AGAP004707-PB', -3654)","('INTRONIC', 'AGAP004707-PC', 49, 'AGAP004707-PC', -3654)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '2j', -1287)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '2j', -1287)","('INTRONIC', '1', 49, '3', -3636)"
2L,2358405,2,T,A,1,0,True,0,6,14,0,0,False,False,False,20.844,1,10.859,1.1562,A,intron_variant,n.147+101T>A,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 101, 'AGAP004707-PA', -3602)","('INTRONIC', 'AGAP004707-PB', 101, 'AGAP004707-PB', -3602)","('INTRONIC', 'AGAP004707-PC', 101, 'AGAP004707-PC', -3602)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '2j', -1235)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '2j', -1235)","('INTRONIC', '1', 101, '3', -3584)"
2L,2358441,2,A,T,78,0,False,0,6,17,0,0,False,False,False,2.4805,1,21.703,0.94385,T,intron_variant,n.147+137A>T,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 137, 'AGAP004707-PA', -3566)","('INTRONIC', 'AGAP004707-PB', 137, 'AGAP004707-PB', -3566)","('INTRONIC', 'AGAP004707-PC', 137, 'AGAP004707-PC', -3566)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '2j', -1199)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '3', -3548)","('INTRONIC', '1', 137, '2j', -1199)","('INTRONIC', '1', 137, '3', -3548)"
2L,2358463,2,G,T,5,0,False,0,4,16,0,0,False,False,False,22.0,0,15.211,-0.42798,T,intron_variant,n.147+159G>T,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 159, 'AGAP004707-PA', -3544)","('INTRONIC', 'AGAP004707-PB', 159, 'AGAP004707-PB', -3544)","('INTRONIC', 'AGAP004707-PC', 159, 'AGAP004707-PC', -3544)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '2j', -1177)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '3', -3526)","('INTRONIC', '1', 159, '2j', -1177)","('INTRONIC', '1', 159, '3', -3526)"
2L,2358468,2,A,C,150,0,False,0,4,17,0,0,False,False,False,1.668,0,19.812,-0.198,C,intron_variant,n.147+164A>C,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 164, 'AGAP004707-PA', -3539)","('INTRONIC', 'AGAP004707-PB', 164, 'AGAP004707-PB', -3539)","('INTRONIC', 'AGAP004707-PC', 164, 'AGAP004707-PC', -3539)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '2j', -1172)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '3', -3521)","('INTRONIC', '1', 164, '2j', -1172)","('INTRONIC', '1', 164, '3', -3521)"
2L,2358501,2,A,T,5,0,False,0,4,22,0,0,False,False,False,11.672,0,14.359,-1.2432,T,intron_variant,n.147+197A>T,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 197, 'AGAP004707-PA', -3506)","('INTRONIC', 'AGAP004707-PB', 197, 'AGAP004707-PB', -3506)","('INTRONIC', 'AGAP004707-PC', 197, 'AGAP004707-PC', -3506)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '2j', -1139)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '3', -3488)","('INTRONIC', '1', 197, '2j', -1139)","('INTRONIC', '1', 197, '3', -3488)"
2L,2358536,2,T,G,4,0,False,0,3,25,0,0,False,False,False,4.3203,1,17.234,2.2852,G,intron_variant,n.147+232T>G,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 232, 'AGAP004707-PA', -3471)","('INTRONIC', 'AGAP004707-PB', 232, 'AGAP004707-PB', -3471)","('INTRONIC', 'AGAP004707-PC', 232, 'AGAP004707-PC', -3471)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '2j', -1104)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '3', -3453)","('INTRONIC', '1', 232, '2j', -1104)","('INTRONIC', '1', 232, '3', -3453)"


## Inspect missense variants

In [101]:
def simplify_missense_effect(v):
    if v and v[0] == 'NON_SYNONYMOUS_CODING':
        return v[1]
    else:
        return ''

    
td_styles = {
    'FILTER_PASS': lambda v: 'background-color: red' if not v else '',
    'NoCoverage': lambda v: 'background-color: red' if v > 1 else '',
    'LowCoverage': lambda v: 'background-color: red' if v > 76 else '',
    'HighCoverage': lambda v: 'background-color: red' if v > 15 else '',
    'LowMQ': lambda v: 'background-color: red' if v > 76 else '',
    'HighMQ0': lambda v: 'background-color: red' if v > 1 else '',
    'RepeatDUST': lambda v: 'background-color: red' if v else '',
    'FS': lambda v: 'background-color: red' if v > 60 else '',
    'QD': lambda v: 'background-color: red' if v < 5 else '',
    'ReadPosRankSum': lambda v: 'background-color: red' if v < -8 else '',
    'HRun': lambda v: 'background-color: red' if v > 4 else '',
    'num_alleles': lambda v: 'background-color: orange' if v > 2 else '',
}


def tr_style(row):
    """Colour row by alternate allele count."""
    return 'background-color:rgba(0, 255, 0, %.3f)' % (min(1, row['AC']/100))


tbl_variants_phase1_missense = (
    tbl_variants_phase1_eff
    .select(lambda row: any(row[t] and row[t][0] == 'NON_SYNONYMOUS_CODING' for t in transcript_ids))
    .convert(transcript_ids, simplify_missense_effect)
)
tbl_variants_phase1_missense.displayall(td_styles=td_styles, tr_style=tr_style)

0|CHROM,1|POS,2|num_alleles,3|REF,4|ALT,5|AC,6|ALTIX,7|FILTER_PASS,8|NoCoverage,9|LowCoverage,10|HighCoverage,11|LowMQ,12|HighMQ0,13|RepeatDUST,14|RepeatMasker,15|RepeatTRF,16|FS,17|HRun,18|QD,19|ReadPosRankSum,20|SNPEFF_Allele,21|SNPEFF_Annotation,22|SNPEFF_HGVS_c,23|SNPEFF_HGVS_p,24|SNPEFF_Feature_ID,25|check_allele,26|exon_start,27|exon_end,28|exon,29|AGAP004707-RA,30|AGAP004707-RB,31|AGAP004707-RC,32|Davies-C1N2,33|Davies-C3N2,34|Davies-C5N2,35|Davies-C7N2,36|Davies-C8N2,37|Davies-C10N2,38|Davies-C11N2,39|Davies-C1N9,40|Davies-C8N9,41|Davies-C1N9ck
2L,2358254,2,G,A,1,0,True,0,0,10,0,0,False,False,False,9.8672,1,17.547,-0.049988,A,missense_variant,n.97G>A,p.Asp33Asn,AGAP004707-RA,True,2358158,2358304,1,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N
2L,2359670,2,G,A,7,0,False,1,171,1,1,0,False,False,False,8.6641,6,14.406,-0.029007,A,intron_variant,n.147+1366G>,,AGAP004707-RA,True,2359640,2359672,2j,,,,,,,,E60K,,,,E60K,
2L,2362002,2,A,T,2,0,True,0,1,3,0,0,False,False,False,0.5459,0,12.531,-0.55322,T,splice_region_variant&intron_varia,n.148-5A>T,,AGAP004707-RA,True,2361989,2362144,3,,,,D54V,D54V,D54V,D54V,D65V,D54V,D54V,D54V,D65V,D54V
2L,2362019,2,G,T,2,0,True,0,0,6,0,0,False,False,False,3.9824,0,13.641,0.7749,T,missense_variant,n.160G>T,p.Gly54Cys,AGAP004707-RA,True,2361989,2362144,3,G54C,G54C,G54C,G60C,G60C,G60C,G60C,G71C,G60C,G60C,G60C,G71C,G60C
2L,2362023,2,C,T,1,0,True,0,1,3,0,0,False,False,False,0.0,0,13.477,-1.1611,T,missense_variant,n.164C>T,p.Pro55Leu,AGAP004707-RA,True,2361989,2362144,3,P55L,P55L,P55L,P61L,P61L,P61L,P61L,P72L,P61L,P61L,P61L,P72L,P61L
2L,2390168,2,A,G,2,0,True,0,2,10,0,0,False,False,False,0.56982,1,15.219,-0.026001,G,missense_variant,n.752A>G,p.Lys251Arg,AGAP004707-RA,True,2390129,2390341,7,K251R,K251R,K251R,K257R,K214R,K257R,K257R,K268R,K257R,K257R,K257R,K268R,K257R
2L,2390177,2,G,A,198,0,True,0,3,8,0,0,False,False,False,0.12695,1,18.625,0.83496,A,missense_variant,n.761G>A,p.Arg254Lys,AGAP004707-RA,True,2390129,2390341,7,R254K,R254K,R254K,R260K,R217K,R260K,R260K,R271K,R260K,R260K,R260K,R271K,R260K
2L,2390311,2,G,A,1,0,True,0,0,10,0,0,False,False,False,0.0,3,14.07,-0.70996,A,missense_variant,n.895G>A,p.Glu299Lys,AGAP004707-RA,True,2390129,2390341,7,E299K,E299K,E299K,E305K,E262K,E305K,E305K,E316K,E305K,E305K,E305K,E316K,E305K
2L,2390448,2,G,A,6,0,True,0,0,15,0,0,False,False,False,0.71094,0,16.125,-0.65918,A,missense_variant,n.949G>A,p.Gly317Ser,AGAP004707-RA,True,2390425,2390485,8,G317S,G317S,G317S,G323S,G280S,G323S,G323S,G334S,G323S,G323S,G323S,G334S,G323S
2L,2391228,3,G,C,10,0,True,0,0,12,0,0,False,False,False,2.0352,0,14.867,-1.1777,C,missense_variant,n.1204G>C,p.Val402Leu,AGAP004707-RA,True,2391156,2391320,10,V402L,V402L,V402L,V408L,V365L,,V408L,V419L,V408L,V408L,V408L,V419L,V408L


## Inspect splice site variants

In [102]:
def simplify_intron_effect(v):
    if v and v[0] in ['SPLICE_REGION', 'SPLICE_CORE']:
        if math.fabs(v[2]) < math.fabs(v[4]):
            return v[1], v[2]
        else:
            return v[3], v[4]
    else:
        return ''

    
td_styles = {
    'FILTER_PASS': lambda v: 'background-color: red' if not v else '',
    'NoCoverage': lambda v: 'background-color: red' if v > 1 else '',
    'LowCoverage': lambda v: 'background-color: red' if v > 76 else '',
    'HighCoverage': lambda v: 'background-color: red' if v > 15 else '',
    'LowMQ': lambda v: 'background-color: red' if v > 76 else '',
    'HighMQ0': lambda v: 'background-color: red' if v > 1 else '',
    'RepeatDUST': lambda v: 'background-color: red' if v else '',
    'FS': lambda v: 'background-color: red' if v > 60 else '',
    'QD': lambda v: 'background-color: red' if v < 5 else '',
    'ReadPosRankSum': lambda v: 'background-color: red' if v < -8 else '',
    'HRun': lambda v: 'background-color: red' if v > 4 else '',
    'num_alleles': lambda v: 'background-color: orange' if v > 2 else '',
}


def tr_style(row):
    """Colour row by alternate allele count."""
    return 'background-color:rgba(0, 255, 0, %.3f)' % (min(1, row['AC']/100))


tbl_variants_phase1_splice = (
    tbl_variants_phase1_eff
    .select(lambda row: any(row[t] and row[t][0] in ['SPLICE_REGION', 'SPLICE_CORE'] for t in transcript_ids))
    .convert(transcript_ids, simplify_intron_effect)
)
tbl_variants_phase1_splice.displayall(td_styles=td_styles, tr_style=tr_style)

0|CHROM,1|POS,2|num_alleles,3|REF,4|ALT,5|AC,6|ALTIX,7|FILTER_PASS,8|NoCoverage,9|LowCoverage,10|HighCoverage,11|LowMQ,12|HighMQ0,13|RepeatDUST,14|RepeatMasker,15|RepeatTRF,16|FS,17|HRun,18|QD,19|ReadPosRankSum,20|SNPEFF_Allele,21|SNPEFF_Annotation,22|SNPEFF_HGVS_c,23|SNPEFF_HGVS_p,24|SNPEFF_Feature_ID,25|check_allele,26|exon_start,27|exon_end,28|exon,29|AGAP004707-RA,30|AGAP004707-RB,31|AGAP004707-RC,32|Davies-C1N2,33|Davies-C3N2,34|Davies-C5N2,35|Davies-C7N2,36|Davies-C8N2,37|Davies-C10N2,38|Davies-C11N2,39|Davies-C1N9,40|Davies-C8N9,41|Davies-C1N9ck
2L,2362002,2,A,T,2,0,True,0,1,3,0,0,False,False,False,0.5459,0,12.531,-0.55322,T,splice_region_variant&intron_varia,n.148-5A>T,,AGAP004707-RA,True,2361989.0,2362144.0,3.0,"('AGAP004707-PA', -5)","('AGAP004707-PB', -5)","('AGAP004707-PC', -5)",,,,,,,,,,
2L,2362003,2,C,T,2,0,True,0,1,3,0,0,False,False,False,0.50195,0,14.062,0.024994,T,splice_region_variant&intron_varia,n.148-4C>T,,AGAP004707-RA,True,2361989.0,2362144.0,3.0,"('AGAP004707-PA', -4)","('AGAP004707-PB', -4)","('AGAP004707-PC', -4)",,,,,,,,,,
2L,2382263,2,A,G,166,0,True,0,45,1,0,0,False,False,False,5.957,0,25.375,-2.8809,G,splice_region_variant&intron_varia,n.492-7A>G,,AGAP004707-RA,True,,,,"('AGAP004707-PA', -7)","('AGAP004707-PB', -7)","('AGAP004707-PC', -7)","('5', -7)",,"('5', -7)","('5', -7)","('5', -7)","('5', -7)","('5', -7)","('5', -7)","('5', -7)","('5', -7)"
2L,2390126,2,C,T,2,0,True,0,2,11,0,0,False,False,False,3.4746,0,14.32,-1.0264,T,splice_region_variant&intron_varia,n.713-3C>T,,AGAP004707-RA,True,,,,"('AGAP004707-PA', -3)","('AGAP004707-PB', -3)","('AGAP004707-PC', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)","('7', -3)"
2L,2400176,2,A,G,1,0,True,0,0,7,0,0,False,False,False,0.0,0,22.203,0.74316,G,splice_region_variant&intron_varia,n.1572+3A>G,,AGAP004707-RA,True,,,,"('AGAP004707-PA', 3)","('AGAP004707-PB', 3)","('AGAP004707-PC', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)","('11i+', 3)"
2L,2407888,2,T,C,4,0,True,0,2,9,0,0,False,False,False,5.7578,0,16.281,-0.76416,C,splice_region_variant&intron_varia,n.2017-6T>C,,AGAP004707-RA,True,,,,"('AGAP004707-PA', -6)","('AGAP004707-PB', -6)","('AGAP004707-PC', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)","('16', -6)"
2L,2417362,2,A,G,496,0,False,5,712,0,0,0,False,False,False,63.062,1,28.844,1.251,G,splice_region_variant&intron_varia,n.2637+4A>G,,AGAP004707-RA,True,,,,"('AGAP004707-PA', 4)","('AGAP004707-PB', 4)","('AGAP004707-PC', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)","('19', 4)"
2L,2425766,2,T,A,79,0,True,0,1,9,0,0,False,False,False,9.9062,0,21.391,1.6143,A,intron_variant,n.4068+315T>,,AGAP004707-RA,True,,,,,,,,,,,,,,,,"('27k', -4)"
2L,2429868,2,C,A,2,0,True,0,0,14,0,0,False,False,False,8.5469,0,14.961,-0.014,A,splice_region_variant&intron_varia,n.4765-4C>A,,AGAP004707-RA,True,,,,"('AGAP004707-PA', -4)","('AGAP004707-PB', -4)","('AGAP004707-PC', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)","('31', -4)"


## Write out variants to file

In [103]:
(tbl_variants_phase1_eff
 .teepickle('../data/tbl_variants_phase1.pkl')
 .convert(transcript_ids, lambda v: ':'.join(map(str, v)))
 .replaceall(None, 'NA')
 .totsv('../data/tbl_variants_phase1.txt')
)

In [104]:
# check OK
etl.frompickle('../data/tbl_variants_phase1.pkl')

0|CHROM,1|POS,2|num_alleles,3|REF,4|ALT,5|AC,6|ALTIX,7|FILTER_PASS,8|NoCoverage,9|LowCoverage,10|HighCoverage,11|LowMQ,12|HighMQ0,13|RepeatDUST,14|RepeatMasker,15|RepeatTRF,16|FS,17|HRun,18|QD,19|ReadPosRankSum,20|SNPEFF_Allele,21|SNPEFF_Annotation,22|SNPEFF_HGVS_c,23|SNPEFF_HGVS_p,24|SNPEFF_Feature_ID,25|check_allele,26|exon_start,27|exon_end,28|exon,29|AGAP004707-RA,30|AGAP004707-RB,31|AGAP004707-RC,32|Davies-C1N2,33|Davies-C3N2,34|Davies-C5N2,35|Davies-C7N2,36|Davies-C8N2,37|Davies-C10N2,38|Davies-C11N2,39|Davies-C1N9,40|Davies-C8N9,41|Davies-C1N9ck
2L,2358254,2,G,A,1,0,True,0,0,10,0,0,False,False,False,9.8672,1,17.547,-0.049988,A,missense_variant,n.97G>A,p.Asp33Asn,AGAP004707-RA,True,2358158.0,2358304.0,1.0,"('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')","('NON_SYNONYMOUS_CODING', 'D33N')"
2L,2358316,2,T,G,73,0,True,0,0,15,0,0,False,False,False,2.4844,0,16.438,1.4219,G,intron_variant,n.147+12T>G,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 12, 'AGAP004707-PA', -3691)","('INTRONIC', 'AGAP004707-PB', 12, 'AGAP004707-PB', -3691)","('INTRONIC', 'AGAP004707-PC', 12, 'AGAP004707-PC', -3691)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '2j', -1324)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '3', -3673)","('INTRONIC', '1', 12, '2j', -1324)","('INTRONIC', '1', 12, '3', -3673)"
2L,2358328,2,T,C,2,0,True,0,0,14,0,0,False,False,False,2.7363,0,16.062,-0.646,C,intron_variant,n.147+24T>C,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 24, 'AGAP004707-PA', -3679)","('INTRONIC', 'AGAP004707-PB', 24, 'AGAP004707-PB', -3679)","('INTRONIC', 'AGAP004707-PC', 24, 'AGAP004707-PC', -3679)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '2j', -1312)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '3', -3661)","('INTRONIC', '1', 24, '2j', -1312)","('INTRONIC', '1', 24, '3', -3661)"
2L,2358353,2,C,T,1,0,True,0,1,15,0,0,False,False,False,1.9512,0,9.8594,1.1582,T,intron_variant,n.147+49C>T,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 49, 'AGAP004707-PA', -3654)","('INTRONIC', 'AGAP004707-PB', 49, 'AGAP004707-PB', -3654)","('INTRONIC', 'AGAP004707-PC', 49, 'AGAP004707-PC', -3654)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '2j', -1287)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '3', -3636)","('INTRONIC', '1', 49, '2j', -1287)","('INTRONIC', '1', 49, '3', -3636)"
2L,2358405,2,T,A,1,0,True,0,6,14,0,0,False,False,False,20.844,1,10.859,1.1562,A,intron_variant,n.147+101T>A,,AGAP004707-RA,True,,,,"('INTRONIC', 'AGAP004707-PA', 101, 'AGAP004707-PA', -3602)","('INTRONIC', 'AGAP004707-PB', 101, 'AGAP004707-PB', -3602)","('INTRONIC', 'AGAP004707-PC', 101, 'AGAP004707-PC', -3602)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '2j', -1235)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '3', -3584)","('INTRONIC', '1', 101, '2j', -1235)","('INTRONIC', '1', 101, '3', -3584)"


In [105]:
etl.fromtsv('../data/tbl_variants_phase1.txt')

0|CHROM,1|POS,2|num_alleles,3|REF,4|ALT,5|AC,6|ALTIX,7|FILTER_PASS,8|NoCoverage,9|LowCoverage,10|HighCoverage,11|LowMQ,12|HighMQ0,13|RepeatDUST,14|RepeatMasker,15|RepeatTRF,16|FS,17|HRun,18|QD,19|ReadPosRankSum,20|SNPEFF_Allele,21|SNPEFF_Annotation,22|SNPEFF_HGVS_c,23|SNPEFF_HGVS_p,24|SNPEFF_Feature_ID,25|check_allele,26|exon_start,27|exon_end,28|exon,29|AGAP004707-RA,30|AGAP004707-RB,31|AGAP004707-RC,32|Davies-C1N2,33|Davies-C3N2,34|Davies-C5N2,35|Davies-C7N2,36|Davies-C8N2,37|Davies-C10N2,38|Davies-C11N2,39|Davies-C1N9,40|Davies-C8N9,41|Davies-C1N9ck
2L,2358254,2,G,A,1,0,True,0,0,10,0,0,False,False,False,9.8672,1,17.547,-0.049988,A,missense_variant,n.97G>A,p.Asp33Asn,AGAP004707-RA,True,2358158.0,2358304.0,1.0,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N,NON_SYNONYMOUS_CODING:D33N
2L,2358316,2,T,G,73,0,True,0,0,15,0,0,False,False,False,2.4844,0,16.438,1.4219,G,intron_variant,n.147+12T>G,,AGAP004707-RA,True,,,,INTRONIC:AGAP004707-PA:12:AGAP004707-PA:-3691,INTRONIC:AGAP004707-PB:12:AGAP004707-PB:-3691,INTRONIC:AGAP004707-PC:12:AGAP004707-PC:-3691,INTRONIC:1:12:3:-3673,INTRONIC:1:12:3:-3673,INTRONIC:1:12:3:-3673,INTRONIC:1:12:3:-3673,INTRONIC:1:12:2j:-1324,INTRONIC:1:12:3:-3673,INTRONIC:1:12:3:-3673,INTRONIC:1:12:3:-3673,INTRONIC:1:12:2j:-1324,INTRONIC:1:12:3:-3673
2L,2358328,2,T,C,2,0,True,0,0,14,0,0,False,False,False,2.7363,0,16.062,-0.646,C,intron_variant,n.147+24T>C,,AGAP004707-RA,True,,,,INTRONIC:AGAP004707-PA:24:AGAP004707-PA:-3679,INTRONIC:AGAP004707-PB:24:AGAP004707-PB:-3679,INTRONIC:AGAP004707-PC:24:AGAP004707-PC:-3679,INTRONIC:1:24:3:-3661,INTRONIC:1:24:3:-3661,INTRONIC:1:24:3:-3661,INTRONIC:1:24:3:-3661,INTRONIC:1:24:2j:-1312,INTRONIC:1:24:3:-3661,INTRONIC:1:24:3:-3661,INTRONIC:1:24:3:-3661,INTRONIC:1:24:2j:-1312,INTRONIC:1:24:3:-3661
2L,2358353,2,C,T,1,0,True,0,1,15,0,0,False,False,False,1.9512,0,9.8594,1.1582,T,intron_variant,n.147+49C>T,,AGAP004707-RA,True,,,,INTRONIC:AGAP004707-PA:49:AGAP004707-PA:-3654,INTRONIC:AGAP004707-PB:49:AGAP004707-PB:-3654,INTRONIC:AGAP004707-PC:49:AGAP004707-PC:-3654,INTRONIC:1:49:3:-3636,INTRONIC:1:49:3:-3636,INTRONIC:1:49:3:-3636,INTRONIC:1:49:3:-3636,INTRONIC:1:49:2j:-1287,INTRONIC:1:49:3:-3636,INTRONIC:1:49:3:-3636,INTRONIC:1:49:3:-3636,INTRONIC:1:49:2j:-1287,INTRONIC:1:49:3:-3636
2L,2358405,2,T,A,1,0,True,0,6,14,0,0,False,False,False,20.844,1,10.859,1.1562,A,intron_variant,n.147+101T>A,,AGAP004707-RA,True,,,,INTRONIC:AGAP004707-PA:101:AGAP004707-PA:-3602,INTRONIC:AGAP004707-PB:101:AGAP004707-PB:-3602,INTRONIC:AGAP004707-PC:101:AGAP004707-PC:-3602,INTRONIC:1:101:3:-3584,INTRONIC:1:101:3:-3584,INTRONIC:1:101:3:-3584,INTRONIC:1:101:3:-3584,INTRONIC:1:101:2j:-1235,INTRONIC:1:101:3:-3584,INTRONIC:1:101:3:-3584,INTRONIC:1:101:3:-3584,INTRONIC:1:101:2j:-1235,INTRONIC:1:101:3:-3584
