In [38]:
from BCBio import GFF
import operator
from Bio.SeqRecord import SeqRecord
import glob, os
from Bio import SeqIO
import time
import pysam

import gffutils
import pyfaidx
from pysam import VariantFile
from Bio.Seq import Seq

In [39]:
sourcebase= '/mnt/test_data/hepavac34/rna_benign/spladdrout'
#sourcebase = '/tmp/'
reffile = '/mnt/test_data/refs/GRCh37.primary_assembly.genome.fa'
fileending = 'gff3'
vcffile= '/tmp/vcf.vcf.gz' #must be indexed!

#db = gffutils.create_db('/tmp/ensg228794.gff', ':memory:')
fasta = pyfaidx.Fasta('/mnt/test_data/refs/GRCh37.primary_assembly.genome.fa')
vcffile='/tmp/vcf.vcf.gz'

outfile = '/tmp/predicted_exons.fa'
outfile2 = '/tmp/predicted_genes.fa'  
vcfrecords=VariantFile(vcffile)


In [40]:
#Getting all gff-files in base directory
def get_gff_files(sourcebase):
    print "Checking for GFF Files in directory..."
    gfffiles = []
    os.chdir(sourcebase)
    for file in glob.glob("*.%s" %fileending):
        gfffiles.append(file)
    print "Found %i file(s) in %s ending with %s." %(len(gfffiles), sourcebase, fileending)
    return gfffiles

In [62]:
def get_gene_sequences(gene,db):
    sequences = []
    num_mod=0
    #print gene
    for i in db.children(gene,featuretype='exon'):
        events=check_feature_for_vcfevent(i)
        if len(events) == 0:
            rec = Seq(i.sequence(fasta)).translate(to_stop=True)
            sequences.append(SeqRecord(rec,i.id,"",""))
        else:
            num_mod+=1
        #    sequences.append(Seq.translate(i.sequence(fasta),to_stop=True))
            rec = get_modified_sequence(i.sequence(fasta),events,i.start)
            sequences.append(SeqRecord(rec,i.id,"",""))
            #sequences.append(get_modified_sequence(i.sequence(fasta),events,i.start))
            #return get_modified_sequence(i.sequence(fasta),events)
    return sequences , num_mod 

In [63]:
def check_feature_for_vcfevent(feature):
    vcfevents = []
    if feature.chrom not in vcfrecords.header.contigs:
        print "Chrom not found"
        return vcfevents
    for rec in vcfrecords.fetch(feature.chrom,feature.start,feature.stop):
        vcfevents.append(rec)
       #a=rec
    if len(vcfevents) == 0:
        return vcfevents
        print "no events in range detected (%s - %i on %i)"%(feature.chrom,feature.start,feature.stop)
    else:
        print "%i events in range detected (%s - %i on %i)"%(len(vcfevents),feature.chrom,feature.start,feature.stop)
        
    return vcfevents

In [64]:
def get_modified_sequence(in_seq,events,offset):
    print "Personalisizing sequence len:%i with %i events"%(len(in_seq),len(events))
    lengthchange=0
    for event in events:
        eventpos=event.start-offset
        print "Eventstart %i, offset: %i -> pos:%i"%(event.start,offset,eventpos)
        print "Char at  %i: %s (region +-3: %s)" %(eventpos,in_seq[eventpos],in_seq[eventpos-3:eventpos+3])
        alleles=event.alleles
        l = list(in_seq)
        
        if is_snp(alleles):
          #  print "Found SNP substituting %s with %s" %(l[eventpos],alleles[1])
            if l[eventpos]!= alleles[0]:
                print "warning SNP missmatch"
            l[eventpos]=alleles[1]
        if is_del(alleles):
            l[eventpos]=alleles[1] #TODO falsch!
            lengthchange-=len(alleles[1])-len(alleles[0])
            print "Found deletion: deleting %s from sequences"%alleles[0]
            print alleles
            print event
        if is_insert(alleles):
            l.insert(eventpos,alleles[1])
            lengthchange+=len(alleles[0])-len(alleles[1])
            print "found insert: inserting %s into sequence at pos %i"%alleles[1],eventpos
            print event
    new_seq=''.join(l)
    if (len(in_seq)-len(new_seq)) != lengthchange:
        print "Warning: length changed %i (should be %i)" %(len(in_seq)-len(new_seq),lengthchange)
    return new_seq 

In [65]:
def is_snp(e):
    if len(e[0]) == 1 and len(e[1]) == 1:
        return True
    return False

def is_insert(e):
    if len(e[0])<len(e[1]):
        return True
    return False

def is_del(e):
    if len(e[0])>len(e[1]):
        return True
    return False

In [66]:
def run():
    gfffiles = get_gff_files(sourcebase)
    
    for infile in gfffiles:
        total_genes=0
        total_exons=0
        total_genes_wrote=0
        total_exons_wrote=0
        total_mod=0
        db = gffutils.create_db(infile, ':memory:')
        for gene in db.features_of_type('gene'):
            total_genes+=1
            seqs,num_mod=get_gene_sequences(gene,db)
            total_exons+=len(seqs)
            total_mod+=num_mod
            #a=seqs
           # return seqs
            total_exons_wrote+=write_records(seqs,'exons')
       # total_genes_wrote+=write_records(seqs,'genes')
        print "processed (and wrote): %s with %i(%i) genes and %i(%i) exons (%i modified))" %(infile,total_genes,total_genes_wrote,total_exons,total_exons_wrote,total_mod)
        
    #ref_recs = load_reffile(reffile)
    
     #   records, records_until_stop = get_records(infile,ref_recs)
      #  write_records(records,records_until_stop,os.path.basename(infile))
        

In [71]:
def write_records(records,type):
    #translate records
    
    if type == 'exons':
        with open(outfile, "w") as out_handle:
            #print(records[1])
            return SeqIO.write(records, out_handle, "fasta")
    if type == 'genes':
        #s=[]
        #for seq in records:
        #    s.append(str(seq.seq))
        #gene=SeqIO.SeqRecord(''.join(s),id=s[0].id)
        print "gene wrting goes here"
       # with open(outfile, "w") as out_handle:
        #    return SeqIO.write(gene, out_handle, "fasta")    
 

In [72]:
a=run()

Checking for GFF Files in directory...
Found 5 file(s) in /mnt/test_data/hepavac34/rna_benign/spladdrout ending with gff3.
Chrom not found
Chrom not found
Chrom not found
ID: exon_2
Number of features: 0
Seq('AECY', ExtendedIUPACProtein())
ID: exon_5
Number of features: 0
Seq('AR', ExtendedIUPACProtein())
ID: exon_8
Number of features: 0
Seq('ASSGLPGSTGPSPDLRSEDPHSQVLELVSSDSLNVPSEEEVYRAVLSWVKHDVD...VPR', ExtendedIUPACProtein())
ID: exon_11
Number of features: 0
Seq('VLELVSSDSLNVPSEEEVYRAVLSWVKHDVDARRQHVPR', ExtendedIUPACProtein())
ID: exon_14
Number of features: 0
Seq('AHCPHPSWCSAPAGPSWTAGFTTWRSRRPSSGGRGAATRHPH', ExtendedIUPACProtein())
ID: exon_17
Number of features: 0
Seq('GCGT', ExtendedIUPACProtein())
ID: exon_20
Number of features: 0
Seq('AVSRPARPGFTVFHHLRLSCDRPQLCRRGRRYLLLQLPEPLPGLVQDH', ExtendedIUPACProtein())
ID: exon_23
Number of features: 0
Seq('AVSRPARPGFTVFHHLRLSCDRPQLCRRGRRYLLLQLPEPLPGLVQDH', ExtendedIUPACProtein())
ID: exon_26
Number of features: 0
Seq('AGSSGMSRL', Extend

ID: exon_533
Number of features: 0
Seq('LVWLVRELVKSGVLGADGVCMTFMKQIA', ExtendedIUPACProtein())
ID: exon_536
Number of features: 0
Seq('VEMLQPKISGWQKVFWIS', ExtendedIUPACProtein())
ID: exon_539
Number of features: 0
Seq('V', ExtendedIUPACProtein())
ID: exon_542
Number of features: 0
Seq('TPSSPV', ExtendedIUPACProtein())
ID: exon_545
Number of features: 0
Seq('CGHLLGHVNQQRRVPWLCCCSPAVQGEWRWTEE', ExtendedIUPACProtein())
ID: exon_548
Number of features: 0
Seq('SKAERGHKVRLAVGSGLRPDTWERFVRRFGPLQVLETYGLTEGNVATINYTGQR...PSP', ExtendedIUPACProtein())
ID: exon_551
Number of features: 0
Seq('SKAERGHKVRLAVGSGLRPDTWERFVRRFGPLQVLETYGLTEGNVATINYTGQR...LYK', ExtendedIUPACProtein())
ID: exon_554
Number of features: 0
Seq('HIFPFSLIRYDVTTGEPIRDPQGHCMATSP', ExtendedIUPACProtein())
ID: exon_557
Number of features: 0
Seq('ESQFGTPRGTVWPHLQ', ExtendedIUPACProtein())
ID: exon_560
Number of features: 0
Seq('VEGGECGHNRGGRGLRGPRFSSGGERLWSHCA', ExtendedIUPACProtein())
ID: exon_563
Number of features: 0
Seq('LAQGKL

ID: exon_1100
Number of features: 0
Seq('VAPKFPGSV', ExtendedIUPACProtein())
ID: exon_1103
Number of features: 0
Seq('MKTLKMTEGDFSGDRGLPG', ExtendedIUPACProtein())
ID: exon_1106
Number of features: 0
Seq('ADKTVPSRLRKSTPAIGTAWAVGPTG', ExtendedIUPACProtein())
ID: exon_1109
Number of features: 0
Seq('TRNPH', ExtendedIUPACProtein())
ID: exon_1112
Number of features: 0
Seq('DQYGNYVIQHVLEHGRPEDKSKIV', ExtendedIUPACProtein())
ID: exon_1115
Number of features: 0
Seq('GCPGAAGQAPGAPP', ExtendedIUPACProtein())
ID: exon_1118
Number of features: 0
Seq('EVEGHTEWSREYGQPPALSQEVPNPKEGRNWKLGTASSFNK', ExtendedIUPACProtein())
ID: exon_1121
Number of features: 0
Seq('VESCEPQPHLTPAHLASSSCQ', ExtendedIUPACProtein())
ID: exon_1124
Number of features: 0
Seq('VPWTAELQLHGKGQLQTLSQKCKREASGTQSERFG', ExtendedIUPACProtein())
ID: exon_1127
Number of features: 0
Seq('EQLLAWCSP', ExtendedIUPACProtein())
ID: exon_1130
Number of features: 0
Seq('TNAWRLSE', ExtendedIUPACProtein())
ID: exon_1133
Number of features: 0
Seq('

ID: exon_1679
Number of features: 0
Seq('KASRPSC', ExtendedIUPACProtein())
ID: exon_1682
Number of features: 0
Seq('VSSLCKLL', ExtendedIUPACProtein())
ID: exon_1685
Number of features: 0
Seq('TSSHYIIEAKGNQRWSR', ExtendedIUPACProtein())
ID: exon_1688
Number of features: 0
Seq('SVHVNVLCGRVKGPN', ExtendedIUPACProtein())
ID: exon_1691
Number of features: 0
Seq('QFWGQKPWSQHYHQGYY', ExtendedIUPACProtein())
ID: exon_1694
Number of features: 0
Seq('QFWGQKPWSQHYHQGYY', ExtendedIUPACProtein())
ID: exon_1697
Number of features: 0
Seq('TNVSAAAQRRKMCLFAGFQRKAVVVCPKDEDYKQRTQKKAEVEGKDLPEHAVLKMK', ExtendedIUPACProtein())
ID: exon_1700
Number of features: 0
Seq('TT', ExtendedIUPACProtein())
ID: exon_1703
Number of features: 0
Seq('', ExtendedIUPACProtein())
ID: exon_1706
Number of features: 0
Seq('VHFFRLCPLC', ExtendedIUPACProtein())
ID: exon_1709
Number of features: 0
Seq('PRNTLLSKGAL', ExtendedIUPACProtein())
ID: exon_1712
Number of features: 0
Seq('VHFFRLCPLC', ExtendedIUPACProtein())
ID: exon_1715


ID: exon_2225
Number of features: 0
Seq('VKS', ExtendedIUPACProtein())
ID: exon_2228
Number of features: 0
Seq('LQELHRAGGDLMHRDEQSRTLLHHAVSTGSKDVVRYLLDH', ExtendedIUPACProtein())
ID: exon_2231
Number of features: 0
Seq('GDGSQ', ExtendedIUPACProtein())
ID: exon_2234
Number of features: 0
Seq('ILGSWNQSRERIIFVFLKLWFPIKVT', ExtendedIUPACProtein())
ID: exon_2237
Number of features: 0
Seq('AGPGPRPGQLGHSQS', ExtendedIUPACProtein())
ID: exon_2240
Number of features: 0
Seq('VLAEDVTQTASGWGWLAHRSCHHAAASSGPSTSISWAELPGHLSS', ExtendedIUPACProtein())
ID: exon_2243
Number of features: 0
Seq('AAWHPRYNLIVVGRYPDPNFK', ExtendedIUPACProtein())
ID: exon_2246
Number of features: 0
Seq('DSALVMTRAPGRDVLVAGGPCA', ExtendedIUPACProtein())
ID: exon_2249
Number of features: 0
Seq('DSALVMTRAPGRDVLVAGGPCA', ExtendedIUPACProtein())
ID: exon_2252
Number of features: 0
Seq('VMLLETSRRYNPGSESITFLKDFSYNREDFAKA', ExtendedIUPACProtein())
ID: exon_2255
Number of features: 0
Seq('IGLLTTMAILLHEIPHE', ExtendedIUPACProtein())
ID:

ID: exon_2813
Number of features: 0
Seq('LESRSPHL', ExtendedIUPACProtein())
ID: exon_2816
Number of features: 0
Seq('NTNLLVLKTQSLGNTA', ExtendedIUPACProtein())
ID: exon_2819
Number of features: 0
Seq('KTVTTASMITTKTLPLVLKAATATMPASVVGQRPTIAMVTAINSQKAVLSTDVQ...VTL', ExtendedIUPACProtein())
ID: exon_2822
Number of features: 0
Seq('ASPLRWKSHVCPRPQAQCPP', ExtendedIUPACProtein())
ID: exon_2825
Number of features: 0
Seq('AQLALKTRLLPSVGVSLRTEDKFVLWFPVFYFGV', ExtendedIUPACProtein())
ID: exon_2828
Number of features: 0
Seq('GENPLQHSWDIYLIVPYIVKLPY', ExtendedIUPACProtein())
ID: exon_2831
Number of features: 0
Seq('TRIADWREGALNGNYLKRKLQDAAEQLKQYEINATPKGWSCHWD', ExtendedIUPACProtein())
ID: exon_2834
Number of features: 0
Seq('MATGGQQKENTLLHLFAGGCGGTVGAIFTCPLEVIKTRLQSLRLALRTVYYPQV...QVL', ExtendedIUPACProtein())
ID: exon_2837
Number of features: 0
Seq('VHSLA', ExtendedIUPACProtein())
ID: exon_2840
Number of features: 0
Seq('ICFCFLHLSLPKACWPLCRAGEWLGLSEP', ExtendedIUPACProtein())
ID: exon_2843
Number 

ID: exon_3212
Number of features: 0
Seq('WFEPSQFGTWWAILSTSMNLAGGLGPILATILAQSYSWRSTLALSGALW', ExtendedIUPACProtein())
ID: exon_3215
Number of features: 0
Seq('GSSPAASRQLMLSASLSVGCCLTR', ExtendedIUPACProtein())
ID: exon_3218
Number of features: 0
Seq('DTGSPWSLPRLNDCPEGGYKGSLGGWVEEGAEKTQLSRSGNCESGKQELWSEAVRLGW', ExtendedIUPACProtein())
ID: exon_3221
Number of features: 0
Seq('RASVLVFEPSCFTGLPR', ExtendedIUPACProtein())
ID: exon_3224
Number of features: 0
Seq('VFNFLYSLPQLLHIIPCPRHRIP', ExtendedIUPACProtein())
ID: exon_3227
Number of features: 0
Seq('FHHFGIALPQL', ExtendedIUPACProtein())
ID: exon_3230
Number of features: 0
Seq('EQCKAFPHHE', ExtendedIUPACProtein())
ID: exon_3233
Number of features: 0
Seq('GEKYIDLRH', ExtendedIUPACProtein())
ID: exon_3236
Number of features: 0
Seq('GEKYIDLRH', ExtendedIUPACProtein())
ID: exon_3239
Number of features: 0
Seq('HATPVS', ExtendedIUPACProtein())
ID: exon_3242
Number of features: 0
Seq('SYQSQQHLH', ExtendedIUPACProtein())
ID: exon_3245
Number of fea

ID: exon_3791
Number of features: 0
Seq('VDPLSEEGWKQKPNLNAIRSLEAVIRVH', ExtendedIUPACProtein())
ID: exon_3794
Number of features: 0
Seq('QNERLKL', ExtendedIUPACProtein())
ID: exon_3797
Number of features: 0
Seq('GTSLGQVSFSKLGSFGAVIQVVLILYPSGNPSLPLFSKPVSWDSRPSTSWTLSPLGL', ExtendedIUPACProtein())
ID: exon_3800
Number of features: 0
Seq('ASGTLFFSSPTCPSSSSCPLHISSLSLRALLAPE', ExtendedIUPACProtein())
ID: exon_3803
Number of features: 0
Seq('ADQCTGLQGFL', ExtendedIUPACProtein())
ID: exon_3806
Number of features: 0
Seq('NLLSAFKLTWSESLDEPVGPFLLARWFGTSGFGTVRGWTK', ExtendedIUPACProtein())
ID: exon_3809
Number of features: 0
Seq('IPVKTQAPPLLSSWPRTGPSPILGQPAAPRTLSCAHPDNWRGCPGPPW', ExtendedIUPACProtein())
ID: exon_3812
Number of features: 0
Seq('IPVKTQAPPLLSSWPRTGPSPILGQPAAPRTLSCAHPDNWRGCPGPP', ExtendedIUPACProtein())
ID: exon_3815
Number of features: 0
Seq('CHLGLTAQPELYLLNTMDADSLVSR', ExtendedIUPACProtein())
ID: exon_3818
Number of features: 0
Seq('FQLHVYFLFNILNILIRKVTYYKSRKIHKYKCQNLSFVFFLE', Exten

ID: exon_4322
Number of features: 0
Seq('YFSRPPKQTTFW', ExtendedIUPACProtein())
ID: exon_4325
Number of features: 0
Seq('DLL', ExtendedIUPACProtein())
ID: exon_4328
Number of features: 0
Seq('HTGAGAGLRSYEETRTRTRDSASAQPPGAGRGPIPDTAVARLYLGAVSAGRLETSMWS', ExtendedIUPACProtein())
ID: exon_4331
Number of features: 0
Seq('EQRPALRDWIAHLCNRCVLIL', ExtendedIUPACProtein())
ID: exon_4334
Number of features: 0
Seq('AVPCPPIKMDVLWEARVGGSQGQDFEISLADMVKPHLY', ExtendedIUPACProtein())
ID: exon_4337
Number of features: 0
Seq('LTEQTPALLGNMAMATSLMDIGDSFGHPACPLVSRSRNSPVEDDDDDDDVVFIE...RSK', ExtendedIUPACProtein())
ID: exon_4340
Number of features: 0
Seq('VYENIIDWYFQFSFARK', ExtendedIUPACProtein())
ID: exon_4343
Number of features: 0
Seq('MCQMGFMLTWKKCRRKERT', ExtendedIUPACProtein())
ID: exon_4346
Number of features: 0
Seq('CCQVYQLE', ExtendedIUPACProtein())
ID: exon_4349
Number of features: 0
Seq('F', ExtendedIUPACProtein())
ID: exon_4352
Number of features: 0
Seq('GSEHRSWLLLPLWSKVLCKEGVF', ExtendedIUPACPro

ID: exon_4871
Number of features: 0
Seq('DGGAFAEASGPHRRDSRGQDSGPGAGGDPKGVCPHPGLLPRRGPSAQAPAAGQT...LWT', ExtendedIUPACProtein())
ID: exon_4874
Number of features: 0
Seq('RSTNPCSAPYQEALGGRGAELRQKP', ExtendedIUPACProtein())
ID: exon_4877
Number of features: 0
Seq('SRAVGWARNLETHFGK', ExtendedIUPACProtein())
ID: exon_4880
Number of features: 0
Seq('RKRWGAEERS', ExtendedIUPACProtein())
ID: exon_4883
Number of features: 0
Seq('SAGGQRSGVEAEA', ExtendedIUPACProtein())
ID: exon_4886
Number of features: 0
Seq('GAGGLPASRVSVVAEP', ExtendedIUPACProtein())
ID: exon_4889
Number of features: 0
Seq('MDYRALVHERDEAAYGELRAMVLDLRAFY', ExtendedIUPACProtein())
ID: exon_4892
Number of features: 0
Seq('CVLRLCAAPSLP', ExtendedIUPACProtein())
ID: exon_4895
Number of features: 0
Seq('ELLGLLHGPPETIIITS', ExtendedIUPACProtein())
ID: exon_4898
Number of features: 0
Seq('GGRALKENPVDLPATEQK', ExtendedIUPACProtein())
ID: exon_4901
Number of features: 0
Seq('DPPPRT', ExtendedIUPACProtein())
ID: exon_4904
Number of featur

ID: exon_5426
Number of features: 0
Seq('ARQHPQPLA', ExtendedIUPACProtein())
ID: exon_5429
Number of features: 0
Seq('EACAGPGRDPGSGHRGRPAGQRLVLATDQPQQSA', ExtendedIUPACProtein())
ID: exon_5432
Number of features: 0
Seq('PLLPQQPVFVVQE', ExtendedIUPACProtein())
ID: exon_5435
Number of features: 0
Seq('PLLPQQPVFVVQE', ExtendedIUPACProtein())
ID: exon_5438
Number of features: 0
Seq('LPWKRIEVMALPKPGGAHSL', ExtendedIUPACProtein())
ID: exon_5441
Number of features: 0
Seq('LPWKRIEVMALPKPGGAHSL', ExtendedIUPACProtein())
ID: exon_5444
Number of features: 0
Seq('TSVPMAIHCSALQPQPCVLGSQVLRASSSSTHCPGSGSK', ExtendedIUPACProtein())
ID: exon_5447
Number of features: 0
Seq('LICRGSEKAAGDRAQARSRGREVWRREVQDLGWSRKLEGGRNP', ExtendedIUPACProtein())
ID: exon_5450
Number of features: 0
Seq('VLKTVANNRDKGRANHSAFLFGFGDGGGGPTQTMLDRLKRLSNTDGLP', ExtendedIUPACProtein())
ID: exon_5453
Number of features: 0
Seq('VLKTVANNRDKGRANHSAFLFGFGDGGGGPTQTMLDRLKRLSNTDGLP', ExtendedIUPACProtein())
ID: exon_5456
Number of features:

ID: exon_5975
Number of features: 0
Seq('GFSPSVCPLGFASYSAPGLASVEGSCSR', ExtendedIUPACProtein())
ID: exon_5978
Number of features: 0
Seq('A', ExtendedIUPACProtein())
ID: exon_5981
Number of features: 0
Seq('GIGTDVYLRLRHIDGREESFRI', ExtendedIUPACProtein())
ID: exon_5984
Number of features: 0
Seq('RK', ExtendedIUPACProtein())
ID: exon_5987
Number of features: 0
Seq('ALGTKDHVMTP', ExtendedIUPACProtein())
ID: exon_5990
Number of features: 0
Seq('RK', ExtendedIUPACProtein())
ID: exon_5993
Number of features: 0
Seq('ALGTKDHVMTP', ExtendedIUPACProtein())
ID: exon_5996
Number of features: 0
Seq('VTSKAL', ExtendedIUPACProtein())
ID: exon_5999
Number of features: 0
Seq('ALGLVPLLAGQLCGEAGEEEEEGREAASWDFPEERTPERAQGGSFRSL', ExtendedIUPACProtein())
ID: exon_6002
Number of features: 0
Seq('SLPMRAWPTRCSCPKEK', ExtendedIUPACProtein())
ID: exon_6005
Number of features: 0
Seq('VIPHYTSEKS', ExtendedIUPACProtein())
ID: exon_6008
Number of features: 0
Seq('TNMK', ExtendedIUPACProtein())
ID: exon_6011
Number o

ID: exon_6563
Number of features: 0
Seq('MASELAMSNSDLPTSPLAMEYVNDFDLMKFEVKKEPVETDRIISQCGRLIAGGS...EFF', ExtendedIUPACProtein())
ID: exon_6566
Number of features: 0
Seq('GIVILMQKLVDWLKVWTSVAFFS', ExtendedIUPACProtein())
ID: exon_6569
Number of features: 0
Seq('MVTRGCCLPAALLGPDNHPANQTQPWMNLWPKHAVDPTVGAN', ExtendedIUPACProtein())
ID: exon_6572
Number of features: 0
Seq('GPPGCGLLLFRLGAEASCQKGERVLLTQYLGHSSPKCLPPTLHLVCTQ', ExtendedIUPACProtein())
ID: exon_6575
Number of features: 0
Seq('PSPARGAGHLQPLGSRLPVEP', ExtendedIUPACProtein())
ID: exon_6578
Number of features: 0
Seq('ISKRSF', ExtendedIUPACProtein())
ID: exon_6581
Number of features: 0
Seq('VSLGLPQQPTAPLLYPRGPHPGHVGGEPLGRAAARGAGAAVGAAASG', ExtendedIUPACProtein())
ID: exon_6584
Number of features: 0
Seq('FQRARLW', ExtendedIUPACProtein())
ID: exon_6587
Number of features: 0
Seq('EIERLKSEKPTWERRLRWEGMKSVFGGPPSLLWMNPFVGFRFRRLPTRPRKGGPEFSV', ExtendedIUPACProtein())
ID: exon_6590
Number of features: 0
Seq('VLLG', ExtendedIUPACProtein())
ID: 

ID: exon_7127
Number of features: 0
Seq('VVPPARSTAGDPTVPGSLFRQLVSEEDNTSAPSLF', ExtendedIUPACProtein())
ID: exon_7130
Number of features: 0
Seq('CLWSRGANPSLLPGSPRRSHSSQIVFSGWWLGSAYVSLEIPATSRDFPGAQAWI...LLY', ExtendedIUPACProtein())
ID: exon_7133
Number of features: 0
Seq('AGLEFLSSSNLPASASQSTGITRRRHHAQARCVLSQSFGRHAAPQAISSSS', ExtendedIUPACProtein())
ID: exon_7136
Number of features: 0
Seq('VIFSAEQTYELMRCLED', ExtendedIUPACProtein())
ID: exon_7139
Number of features: 0
Seq('QNRLMS', ExtendedIUPACProtein())
ID: exon_7142
Number of features: 0
Seq('NRLMS', ExtendedIUPACProtein())
ID: exon_7145
Number of features: 0
Seq('VIFSAEQTYELMRCLEDLTSRRPVHGESDTEQLQ', ExtendedIUPACProtein())
ID: exon_7148
Number of features: 0
Seq('VGADLWIYKGSSFSIRKDLLENSHAELV', ExtendedIUPACProtein())
ID: exon_7151
Number of features: 0
Seq('VGADLWIYKGSSFSIRKDLLENSHAELV', ExtendedIUPACProtein())
ID: exon_7154
Number of features: 0
Seq('AQSRCCPMAPLMW', ExtendedIUPACProtein())
ID: exon_7157
Number of features: 0
Seq('GR

TypeError: SeqRecord (id=exon_7569) has an invalid sequence.

In [None]:
def translate