In [2]:
import sys
import gzip
import argparse
import pickle
from statistics import mean, median
import numpy as np
import pandas as pd
from Bio.Seq import Seq
import pyfastx



chrom = 'chr1'
strand = '+'
junc = [(1490671, 1495485), (1478745, 1479049), (1489274, 1489692), (1477350, 1478644), (1486235, 1486544), (1486668, 1489204), (1485838, 1486110), (1482614, 1485016), (1479108, 1480867), (1455519, 1523839), (1486668, 1487863), (1478351, 1478644), (1462708, 1477274), (1480908, 1482266), (1477350, 1477751), (1485171, 1485782), (1487914, 1489204), (1472089, 1477274), (1490424, 1490563), (1489274, 1490257), (1489811, 1490257), (1482303, 1482545), (1477350, 1480867), (1472306, 1477274), (1480936, 1482138)]
start_codons = {(1471885, 1471887)}
stop_codons = {(1495815, 1495817)}
gene_name = 'ATAD3B'
exonLcutoff=1000
verbose=True



In [15]:


'''
Compute whether there is a possible combination that uses the junction without
inducing a PTC. We start with all annotated stop codon and go backwards.
'''

fa = pyfastx.Fasta('genome.fa')

seed = []

junc.sort()
if strand == "+":
    junc.reverse()
    
"""Quinn Comment: Adds all 'stop codons' to a nested list called seed""" 
##in an individual transcript    
for c in stop_codons:
    if strand == "+":
        seed.append([c[1]])
    else:
        seed.append([c[0]])

# seed starts with just stop codon and then a possible 3'ss-5'ss junction
# without introducing a PTC [stop_codon,3'ss, 5'ss, 3'ss, ..., start_codon]

junc_pass = {}
junc_fail = {}
path_pass = []
proteins = []

dic_terminus = {}

depth = 0


#seed = [[25832088, 25832158, 25835022, 25835846, 25837413], [25832088, 25832158, 25835022, 25835846, 25859280]]


"""Quinn Comment: while our seed length is greater than 0 - which means we have charted all possible paths through 
all junctions ending in a stop codon (or there is an exon longer than 1000 bp and we have no complete paths)"""
while len(seed) > 0:

    new_seed = []
    final_check = []
    depth += 1
    if verbose:
        sys.stdout.write("Depth %s, Seed L = %s\n"%(depth, len(seed)))
    #print(start_codons, [s[-1] for s in seed][-10:], len(junc))
    framepos = {}
            
    for s in seed:
        # first check that the seed paths are good        
        bool_ptc = False
        leftover = ''
        if len(s) > 0:            
            leftover = Seq("")
            allprot = Seq("")

            """Quinn Comment: loop through the exons, calculating lengths"""
            for i in range(0, len(s)-1, 2):
                exon_coord = s[i:i+2]
                exon_coord.sort()
                exon_coord = tuple(exon_coord)
                exlen = exon_coord[1]-exon_coord[0]


                """Quinn Comment: find start position relative to named start of this exon and translate to protein"""
                startpos = (len(leftover)+exlen+1)%3
                if strand == '+':
                    seq = Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))+leftover 
                    prot = seq[startpos:].translate()
                    leftover = seq[:startpos]                                                                                                               
                    allprot = prot+allprot  
                else:
                    seq = leftover+Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))
                    aseq = seq
                    if startpos > 0:
                        leftover = seq[-startpos:]
                    else:
                        leftover = Seq("")
                    seq = seq.reverse_complement()
                    prot = seq[startpos:].translate()
                    allprot = prot+allprot

                #found a PTC in this transcript if any element but the last is a stop codon   
                bool_ptc = "*" in allprot[:-1]
        """Quinn Comment: if we found a PTC, add all intron coordinate pairs involved in the transcript to junc_fail""" 
               
        if bool_ptc:
            #This transcript failed
            for i in range(1, len(s)-1, 2):                                                                                                                  
                j_coord = s[i:i+2]                                                                                                                           
                j_coord.sort()                                                                                                                             
                j_coord = tuple(j_coord)                                                                                                                     
                if j_coord not in junc_fail:                                                                                                                 
                    junc_fail[j_coord] = 0                                                                                                                   
                junc_fail[j_coord] += 1  
            continue
    
        # passed
        """Quinn Comment: if we don't just have a stop codon, create a terminus for this 
        seed at the last 3' splice site or start codon; terminus is last two coordinates and the reading frame, 
        used for dynamic programming later"""
        if len(s) > 2:
            terminus = (s[-2],s[-1],leftover)
            

            ###THIS LINE IS KEY FOR DYNAMIC PROGRAMMING BEHAVIOR
            if terminus in dic_terminus:
                dic_terminus[terminus].append(tuple(s))
                continue
            else:
                dic_terminus[terminus] = [tuple(s)]
        
        last_pos = s[-1]
        """Quinn Comment: check the last position of our seed to see if it is close to a start codon, within a potential exon's length,
            and add your seed plus this start codon to final_check """
        for start in start_codons:                
            #print("start", start, abs(last_pos-start[0]))
            if strand == "+" and last_pos > start[0] and abs(last_pos-start[0]) < exonLcutoff:
                final_check.append(s+[start[0]])
            elif strand == "-" and last_pos < start[1] and abs(last_pos-start[1]) < exonLcutoff:
                final_check.append(s+[start[1]]) 

        """Quinn Comment: add all possible places to go from our last_pos to the seed (nested list)"""
        for j0,j1 in junc:                
            if strand == "+" and last_pos > j1 and abs(last_pos-j1) < exonLcutoff:
                new_seed.append(s+[j1,j0])
            #print("junction", (j0,j1), abs(last_pos-j0))
            if strand == "-" and last_pos < j0 and abs(last_pos-j0) < exonLcutoff: 
                new_seed.append(s+[j0,j1])
                
    """Quinn Comment: Exited from s in seed loop, now we check our final_checks of the full paths, we do not
    eliminate paths based on presence of a PTC, rather we classify full complete paths without PTCs if they exist"""
    # check that the possible final paths are good
    for s in final_check:

        leftover = Seq("")
        allprot = Seq("")
        for i in range(0, len(s)-1, 2):
            exon_coord = s[i:i+2]
            exon_coord.sort()
            exon_coord = tuple(exon_coord)
            exlen = exon_coord[1]-exon_coord[0]
            startpos = (len(leftover)+exlen+1)%3
            if strand == "+":
                seq = Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))+leftover
                leftover = seq[:startpos]  
                prot = seq[startpos:].translate()
                allprot = prot+allprot
            else:
                seq = leftover+Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))
                if startpos > 0:                                                                                                    
                    leftover = seq[-startpos:]                                    
                else:
                    leftover = Seq("")
                seq = seq.reverse_complement()                                                                                                           
                prot = seq[startpos:].translate()                                                                                                        
                allprot = prot+allprot                      
        bool_ptc = "*" in allprot[:-1]
        """Quinn Comment: Classify seed + start codon as a passing path if no PTCs found in previous block of code"""
        if not bool_ptc:
            print(allprot)
            # all pass
            proteins.append("\t".join([gene_name,chrom,strand, "-".join([str(x) for x in s]), str(allprot)])+'\n')
            #print("ALL PASS %s"%(s))
            path_pass.append(tuple(s))
            for i in range(1, len(s), 2):
                j_coord = s[i:i+2]
                j_coord.sort()
                j_coord = tuple(j_coord)
                if j_coord not in junc_pass:
                    junc_pass[j_coord] = 0
                junc_pass[j_coord] += 1

    seed = new_seed



"""Quinn Comment: OUT OF WHILE LOOP through all possible paths/seeds; 
check all junctions that end in termini that eventually pass 
to see if they are part of a full path that has been classified as passing"""
while True:
    new_paths = []
    for terminus in dic_terminus:
        terminus_pass = False
        for path_subset in dic_terminus[terminus]:
            for path in path_pass:
                if path[:len(path_subset)] == path_subset:
                    terminus_pass = True
                    break
        #print(terminus, terminus_pass)

        """Quinn Comment: add all junctions from a passing terminus to junc_pass"""
        if terminus_pass:
            for path_subset in dic_terminus[terminus]:
                if path_subset in path_pass: continue
                new_paths.append(path_subset)
                path_pass.append(path_subset)
                for i in range(1, len(path_subset), 2):
                    j_coord = list(path_subset[i:i+2])
                    j_coord.sort()
                    j_coord = tuple(j_coord)
                    if j_coord not in junc_pass:
                        junc_pass[j_coord] = 0
                        if verbose:
                            sys.stdout.write("junction pass" + str(j_coord))
    """Quinn Comment: we could have a new path_pass added, so our while loop checks again to see if there are any new paths 
    that are now going to be passing considering our additions"""
    if len(new_paths) == 0:
        break
        
#junc_pass,junc_fail,proteins

Depth 1, Seed L = 1
Depth 2, Seed L = 1
Depth 3, Seed L = 4
Depth 4, Seed L = 3
Depth 5, Seed L = 5
Depth 6, Seed L = 4
Depth 7, Seed L = 3
Depth 8, Seed L = 4
Depth 9, Seed L = 2
Depth 10, Seed L = 1
Depth 11, Seed L = 3
Depth 12, Seed L = 2
Depth 13, Seed L = 4
Depth 14, Seed L = 6
CRGSSALTRAPRVKARGRRRLCRPRSPGPRAAGTAVWETGRRPRTNGATSTPPAWSAPPRRRASWSTRVSAAGRGGAGGRAGRAGEAGALALAAPRCCRQPLPGRDCAPGAPPAGAVSRAGRIGLFPSPVCTSAAVRSGSGYAKEALNLAQMQEQTLQLEQQSKLKQLLNEENLRKQEESVQKQEAMRRATVEREMELRHKNEMLRVETEARARAKAERENADIIREQIRLKASEHRQTVLESIRTAGTLFGEGFRAFVTDRDKVTATVAGLTLLAVGVYSAKNATAVTGRFIEARLGKPSLVRETSRITVLEALRHPIQVSRRLLSRPQDVLEGVVLSPSLEARVRDIAIATRNTKKNRGLYRHILLYGPPGTGKTLFAKKLALHSGMDYAIMTGGDVAPMGREGVTAMHKLFDWANTSRRGLLLFMDEADAFLRKRATEEISKDLRATLNAFLYHMGQHSNKFMLVLASNLPEQFDCAINSRIDVMVHFDLPQQEERERLVRLHFDNCVLKPATEGKRRLKLAQFDYGRKCSEVARLTEGMSGREIAQLAVSWQATAYASKDGVLTEAMMDACVQDAVQQYRQKMRWLKAEGPGRGVEHPLSGVQGETLTSWSLATDPSYPCLAGPCTFRICSWMGTGLCPGPLSPRMSCGGGRPFCPPGHPLL*
MSWLFGVNKGPKGEGAGPPPPLPPAQPGAEGGGDRGLGDRPAPKDKW

In [16]:
junc_pass

{(1490671, 1495485): 2,
 (1490424, 1490563): 2,
 (1489274, 1490257): 2,
 (1487914, 1489204): 2,
 (1486668, 1487863): 2,
 (1486235, 1486544): 2,
 (1485838, 1486110): 2,
 (1485171, 1485782): 2,
 (1482614, 1485016): 2,
 (1482303, 1482545): 2,
 (1480936, 1482138): 2,
 (1477350, 1480867): 2,
 (1472306, 1477274): 1,
 (1471885,): 2,
 (1472089, 1477274): 1,
 (1480908, 1482266): 0,
 (1479108, 1480867): 0,
 (1478745, 1479049): 0,
 (1477350, 1478644): 0}

In [13]:
junc.sort()
if strand == "+":
    junc.reverse()
    
"""Quinn Comment: Adds all 'stop codons' to a nested list called seed""" 
##in an individual transcript    
for c in stop_codons:
    if strand == "+":
        seed.append([c[1]])
    else:
        seed.append([c[0]])

# seed starts with just stop codon and then a possible 3'ss-5'ss junction
# without introducing a PTC [stop_codon,3'ss, 5'ss, 3'ss, ..., start_codon]

junc_pass = {'long_exon':{}, 'long_exon':{}}
junc_fail = {}
path_pass = {'long_exon':[], 'long_exon':[]}
proteins = []

dic_terminus = {'normal': {}, 'long_exon': {}}

depth = 0

"""Quinn Comment: while our seed length is greater than 0 - which means we have charted all possible paths through 
all junctions ending in a stop codon (or there is an exon longer than 1000 bp and we have no complete paths)"""
while len(seed) > 0:
    new_seed = []
    final_check = []
    depth += 1
    if verbose:
        sys.stdout.write("Depth %s, Seed L = %s\n"%(depth, len(seed)))
    #print(start_codons, [s[-1] for s in seed][-10:], len(junc))
    framepos = {}
                
    for s in seed:
        # first check that the seed paths are good        
        bool_ptc = False
        leftover = ''
        if len(s) > 0:                
            leftover = Seq("")
            allprot = Seq("")

            """Quinn Comment: loop through the exons, calculating lengths"""
            for i in range(0, len(s)-1, 2):
                exon_coord = s[i:i+2]
                exon_coord.sort()
                exon_coord = tuple(exon_coord)
                exlen = exon_coord[1]-exon_coord[0]


                """Quinn Comment: find start position relative to named start of this exon and translate to protein"""
                startpos = (len(leftover)+exlen+1)%3
                if strand == '+':
                    seq = Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))+leftover 
                    """Quinn Comment: exon length rule"""
                    if exlen + 1 > 407:
                        prot = seq[startpos:].translate(stop_symbol = '@')
                    else:
                        prot = seq[startpos:].translate()
                    leftover = seq[:startpos]                                                                                                               
                    allprot = prot+allprot  
                else:
                    seq = leftover+Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))
                    aseq = seq
                    if startpos > 0:
                        leftover = seq[-startpos:]
                    else:
                        leftover = Seq("")
                    seq = seq.reverse_complement()

                    if exlen + 1 > 407:
                        prot = seq[startpos:].translate(stop_symbol = '@')
                    else:
                        prot = seq[startpos:].translate()
                    
                    allprot = prot+allprot

                #found a PTC in this transcript if any element but the last is a stop codon    
                bool_ptc = "*" in allprot[:-1]
                bool_long_exon = '@' in allprot[:-1]



        """Quinn Comment: if we found a PTC, add all intron coordinate pairs involved in the transcript to junc_fail"""        
        if bool_ptc:
            #This transcript failed
            for i in range(1, len(s)-1, 2):                                                                                                                  
                j_coord = s[i:i+2]                                                                                                                           
                j_coord.sort()                                                                                                                             
                j_coord = tuple(j_coord)                                                                                                                     
                if j_coord not in junc_fail:                                                                                                                 
                    junc_fail[j_coord] = 0                                                                                                                   
                junc_fail[j_coord] += 1  

            continue
    
        # passed
        """Quinn Comment: if we don't just have a stop codon, create a terminus for this 
        seed at the last 3' splice site or start codon; terminus is last two coordinates and the reading frame, 
        used for dynamic programming later"""
        if len(s) > 2:
            terminus = (s[-2],s[-1],leftover)
            
            if not bool_long_exon:
                if terminus in dic_terminus['normal']:
                    dic_terminus['normal'][terminus].append(tuple(s))
                    continue
                else:
                    dic_terminus['normal'][terminus] = [tuple(s)]
            else:
                if terminus in dic_terminus['long_exon']:
                    dic_terminus['long_exon'][terminus].append(tuple(s))
                    continue
                else:
                    dic_terminus['long_exon'][terminus] = [tuple(s)]
        
        last_pos = s[-1]
        
        """Quinn Comment: check the last position of our seed to see if it is close to a start codon, within a potential exon's length,
            and add your seed plus this start codon to final_check """
        for start in start_codons:                
            #print("start", start, abs(last_pos-start[0]))
            if strand == "+" and last_pos > start[0] and abs(last_pos-start[0]) < exonLcutoff:
                final_check.append(s+[start[0]])
            elif strand == "-" and last_pos < start[1] and abs(last_pos-start[1]) < exonLcutoff:
                final_check.append(s+[start[1]]) 

        """Quinn Comment: add all possible places to go from our last_pos to the seed (nested list)"""
        for j0,j1 in junc:                
            if strand == "+" and last_pos > j1 and abs(last_pos-j1) < exonLcutoff:
                new_seed.append(s+[j1,j0])
            #print("junction", (j0,j1), abs(last_pos-j0))
            if strand == "-" and last_pos < j0 and abs(last_pos-j0) < exonLcutoff: 
                new_seed.append(s+[j0,j1])
                
    """Quinn Comment: Exited from s in seed loop, now we check our final_checks of the full paths, we do not
    eliminate paths based on presence of a PTC, rather we classify full complete paths without PTCs if they exist"""
    # check that the possible final paths are good
    for s in final_check:
        leftover = Seq("")
        allprot = Seq("")
        for i in range(0, len(s)-1, 2):
            exon_coord = s[i:i+2]
            exon_coord.sort()
            exon_coord = tuple(exon_coord)
            exlen = exon_coord[1]-exon_coord[0]
            startpos = (len(leftover)+exlen+1)%3
            if strand == "+":
                seq = Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))+leftover
                leftover = seq[:startpos]  
                if exlen + 1 > 407:
                    prot = seq[startpos:].translate(stop_symbol = '@')
                else:
                    prot = seq[startpos:].translate()
                allprot = prot+allprot
            else:
                seq = leftover+Seq(fa.fetch(chrom, (exon_coord[0],exon_coord[1])))
                if startpos > 0:                                                                                                    
                    leftover = seq[-startpos:]                                    
                else:
                    leftover = Seq("")
                seq = seq.reverse_complement()                                                                                                           
                if exlen + 1 > 407:
                    prot = seq[startpos:].translate(stop_symbol = '@')
                else:
                    prot = seq[startpos:].translate()                                                                                                       
                allprot = prot+allprot                    
        bool_ptc = "*" in allprot[:-1]
        bool_long_exon = '@' in allprot[:-1]
    
        """Quinn Comment: Classify seed + start codon as a passing path if no PTCs found in previous block of code"""
        if not bool_ptc:
            # all pass
            proteins.append("\t".join([gene_name,chrom,strand, "-".join([str(x) for x in s]), str(allprot)])+'\n')
            #print("ALL PASS %s"%(s))
            if bool_long_exon:
                print('long_exon:')
                print(allprot)
                path_pass['long_exon'].append(tuple(s))
            else:
                print('normal:')
                print(allprot)
                path_pass['normal'].append(tuple(s))
            for i in range(1, len(s), 2):
                j_coord = s[i:i+2]
                j_coord.sort()
                j_coord = tuple(j_coord)
                if not bool_long_exon: 
                    if j_coord not in junc_pass['normal']:
                        junc_pass['normal'][j_coord] = 0
                    junc_pass['normal'][j_coord] += 1
                else:
                    if j_coord not in junc_pass['long_exon']:
                        junc_pass['long_exon'][j_coord] = 0
                    junc_pass['long_exon'][j_coord] += 1

    seed = new_seed


"""Quinn Comment: OUT OF WHILE LOOP through all possible paths/seeds; 
check all termini to see if they are part of a full path that has been classified as passing"""
while True:
    new_paths = []
    for terminus in dic_terminus['normal']:
        terminus_pass = False
        for path_subset in dic_terminus['normal'][terminus]:
            for path in path_pass['normal']:
                if path[:len(path_subset)] == path_subset:
                    terminus_pass = True
                    break
        #print(terminus, terminus_pass)

        """Quinn Comment: if our terminus is part of a passing path, we want to make sure if is reflected in passing paths and
        add the associate junctions to junc_pass, only if they are not present"""
        if terminus_pass:
            subsets_to_check = dic_terminus['normal'][terminus]
            for path_subset in subsets_to_check:
                if path_subset in path_pass['normal']: continue
                new_paths.append(path_subset)
                path_pass['normal'].append(path_subset)
                for i in range(1, len(path_subset), 2):
                    j_coord = list(path_subset[i:i+2])
                    j_coord.sort()
                    j_coord = tuple(j_coord)
                    if j_coord not in junc_pass['normal']:
                        junc_pass['normal'][j_coord] = 0
                        if verbose:
                            sys.stdout.write("junction pass:" + str(j_coord))

    """Quinn Comment: we could have a new path_pass added, so our while loop checks again to see if there are any new paths 
    that are now going to be passing considering our additions"""
    if len(new_paths) == 0:
        break


while True:
    new_paths = []
    for terminus in dic_terminus['long_exon']:
        terminus_pass = False
        for path_subset in dic_terminus['long_exon'][terminus]:
            for path in path_pass['long_exon']:
                if path[:len(path_subset)] == path_subset:
                    terminus_pass = True
                    break
        #print(terminus, terminus_pass)

        """Quinn Comment: if our terminus is part of a passing path, we want to make sure if is reflected in passing paths and
        add the associate junctions to junc_pass, only if they are not present"""
        if terminus_pass:
            subsets_to_check = dic_terminus['long_exon'][terminus]
            for path_subset in subsets_to_check:
                if path_subset in path_pass['long_exon']: continue
                new_paths.append(path_subset)
                path_pass['long_exon'].append(path_subset)
                for i in range(1, len(path_subset), 2):
                    j_coord = list(path_subset[i:i+2])
                    j_coord.sort()
                    j_coord = tuple(j_coord)
                    if j_coord not in junc_pass['long_exon']:
                        junc_pass['long_exon'][j_coord] = 0
                        if verbose:
                            sys.stdout.write("junction long_exon:" + str(j_coord))

    """Quinn Comment: we could have a new path_pass added, so our while loop checks again to see if there are any new paths 
    that are now going to be passing considering our additions"""
    if len(new_paths) == 0:
        break
        


Depth 1, Seed L = 1
Depth 2, Seed L = 1
Depth 3, Seed L = 4
Depth 4, Seed L = 10
Depth 5, Seed L = 9
Depth 6, Seed L = 10
Depth 7, Seed L = 12
Depth 8, Seed L = 13
Depth 9, Seed L = 17
long_exon:
CRGSSALTRAPRVKARGRRRLCRPRSPGPRAAGTAVWETGRRPRTNGATSTPPAWSAPPRRRASWSTRVSAAGRGGAGGRAGRAGEAGALALAAPRCCRQPLPGRDCAPGAPPAGAVSRAGRIGLFPSPVCTSAAVRSGSGYAKEALNLAQMQEQTLQLEQQSKLKQLLNEENLRKQEESVQKQEAMRRATVEREMELRHKNEMLRVETEARARAKAERENADIIREQIRLKASEHRQTVLESIR@ALPRPGPATDGAPQV@VAGPRALSSSSRPGRHRLTPWWGHCPSVLARPCRHVRASPSTCSRCVVRIFVSFLVTPLLSPQDGWHLVWGRIPCLCDRPGQSDSHGGWADAAGCRGLLSQECDSRHWPLHRGSAGEAVPSEGDVPHHGAGGAAAPHPGSGAGLALPECSSWLSPFCPTSTAHAHPPVPSLSPDNRHPHAASRVGFPVWRCTLGVCISETLPLSASVSLAQGS@WGLGAHRGPCKTRDLGVRPSVGEAATGHGVWWPPWGAAPLASP@GACSPQVTG@VVKKIKANKEPENAPNPSNSLLVSRRGRVPAPGRSWLCFGAAPFLCVTEHVCALVAVPWLWQVTQWCFPFPSGRSAGGSSVDPRTCWRVLCLVPAWKHGCATSP@QPGTPRRTGACTGTSCCMGHQAPGRRCLPR@ERLAEQVGQGPLGSHLPAGVWGPQPPGEWTPLRPLPTLV@AQGAGVGSSASHLPGGGRLLSGRLWLPDRDTRQGLHTPGGVCRLCRGRGNICSVSPHSSCPETRPALRHGLRHHDRRGRGPHGAGRRDRHAQAL@LGQ

In [14]:
junc_pass

{'normal': {(1490671, 1495485): 2,
  (1490424, 1490563): 2,
  (1489274, 1490257): 2,
  (1487914, 1489204): 2,
  (1486668, 1487863): 2,
  (1486235, 1486544): 2,
  (1485838, 1486110): 2,
  (1485171, 1485782): 2,
  (1482614, 1485016): 2,
  (1482303, 1482545): 2,
  (1480936, 1482138): 2,
  (1477350, 1480867): 2,
  (1472306, 1477274): 1,
  (1471885,): 2,
  (1472089, 1477274): 1},
 'long_exon': {(1490671, 1495485): 3,
  (1489811, 1490257): 3,
  (1486668, 1489204): 3,
  (1485838, 1486110): 3,
  (1482614, 1485016): 3,
  (1480936, 1482138): 3,
  (1477350, 1480867): 2,
  (1472306, 1477274): 2,
  (1471885,): 3,
  (1472089, 1477274): 1,
  (1479108, 1480867): 1,
  (1478745, 1479049): 1,
  (1477350, 1477751): 1,
  (1489274, 1490257): 0,
  (1490424, 1490563): 0,
  (1485171, 1485782): 0,
  (1487914, 1489204): 0,
  (1486668, 1487863): 0,
  (1482303, 1482545): 0,
  (1480908, 1482266): 0,
  (1477350, 1478644): 0,
  (1478351, 1478644): 0,
  (1486235, 1486544): 0}}

In [8]:
path_pass

{'normal': [],
 'long_exon': [(1495817,
   1495485,
   1490671,
   1490257,
   1489811,
   1489204,
   1486668,
   1486110,
   1485838,
   1485016,
   1482614,
   1482138,
   1480936,
   1480867,
   1477350,
   1477274,
   1472306,
   1471885),
  (1495817,
   1495485,
   1490671,
   1490257,
   1489811,
   1489204,
   1486668,
   1486110,
   1485838,
   1485016,
   1482614,
   1482138,
   1480936,
   1480867,
   1477350,
   1477274,
   1472089,
   1471885),
  (1495817,
   1495485,
   1490671,
   1490257,
   1489811,
   1489204,
   1486668,
   1486110,
   1485838,
   1485016,
   1482614,
   1482138,
   1480936,
   1480867,
   1479108,
   1479049,
   1478745,
   1477751,
   1477350,
   1477274,
   1472306,
   1471885),
  (1495817, 1495485, 1490671),
  (1495817, 1495485, 1490671, 1490257, 1489811),
  (1495817, 1495485, 1490671, 1490563, 1490424, 1490257, 1489811),
  (1495817, 1495485, 1490671, 1490257, 1489811, 1489204, 1486668),
  (1495817, 1495485, 1490671, 1490257, 1489274, 1489204, 14