In [1]:
import pybedtools as pbt
import pysam
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import os.path as op

In [3]:
import splanl.junction_scorer as jn
import splanl.merge_bcs as mbcs
import splanl.coords as coords
import splanl.plots as sp
import splanl.score_motifs as sm
import splanl.inspect_variants as iv
import splanl.post_processing as pp

Using TensorFlow backend.


In [4]:
fa_file = '/nfs/kitzman2/jacob/proj/jensplice/20220415_wt1_mpsa_trial3/jkp1053_1054_1055.fa'

In [5]:
refseq = pp.get_refseq( fa_file )

In [6]:
bam1 = ! ls /nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test*/*out.wbcs.bam

In [7]:
bam2 = ! ls /nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT*/*out.wbcs.bam

In [8]:
bam1

['/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_JKLab0340_MM1B/BB_test_Cos1053_JKLab0340_MM1BAligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_JKLab0340_MM2B/BB_test_Cos1053_JKLab0340_MM2BAligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_oligodT_MM1B/BB_test_Cos1053_oligodT_MM1BAligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_oligodT_MM2B/BB_test_Cos1053_oligodT_MM2BAligned.out.wbcs.bam']

In [9]:
bam2

['/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1_340_243_Cos1053/BB_RT_1_340_243_Cos1053Aligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1_340_243_Hek1053/BB_RT_1_340_243_Hek1053Aligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1_340_335_Cos1053/BB_RT_1_340_335_Cos1053Aligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1_340_335_Hek1053/BB_RT_1_340_335_Hek1053Aligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1b_340_243_Hek1053/BB_RT_1b_340_243_Hek1053Aligned.out.wbcs.bam',
 '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1b_340_335_Hek1053/BB_RT_1b_340_335_Hek1053Aligned.out.wbcs.bam',
 '/nfs

In [10]:
bam = bam1 + bam2

In [11]:
len( bam )

32

In [12]:
msamp_fn = { b.split( '/')[ -1 ].split( '.' )[ 0 ].replace( 'Aligned', '' ) : b for b in bam }

In [13]:
msamp_fn

{'BB_test_Cos1053_JKLab0340_MM1B': '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_JKLab0340_MM1B/BB_test_Cos1053_JKLab0340_MM1BAligned.out.wbcs.bam',
 'BB_test_Cos1053_JKLab0340_MM2B': '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_JKLab0340_MM2B/BB_test_Cos1053_JKLab0340_MM2BAligned.out.wbcs.bam',
 'BB_test_Cos1053_oligodT_MM1B': '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_oligodT_MM1B/BB_test_Cos1053_oligodT_MM1BAligned.out.wbcs.bam',
 'BB_test_Cos1053_oligodT_MM2B': '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_test_Cos1053_oligodT_MM2B/BB_test_Cos1053_oligodT_MM2BAligned.out.wbcs.bam',
 'BB_RT_1_340_243_Cos1053': '/nfs/kitzman2/jacob/proj/jensplice/20220513_wt1_mpsa_trial4and3_deep_redoAligns/process_star/BB_RT_1_340_243_Cos1053/BB_RT_1_340_24

In [14]:
msamp_rnabam = { samp: pysam.AlignmentFile( msamp_fn[ samp ], 'rb' ) for samp in msamp_fn }

In [15]:
%%time
#I made this more stringent than MSH2 since there is less skipping
#requiring 90 forward and 70 reverse matches
isos_dfs = { samp: jn.get_all_isoforms_pe( msamp_rnabam[ samp ],
                                           [ ( 649, 696 ), ( 3478, 3533 ) ],
                                            spl_tol = 3,
                                            indel_tol = 20,
                                            min_matches_for = 90,
                                            min_matches_rev = 70 )
             for samp in msamp_rnabam }

CPU times: user 32min 36s, sys: 14.1 s, total: 32min 50s
Wall time: 32min 54s


In [16]:
for samp in isos_dfs.keys():
    
    print( samp )
    
    print( isos_dfs[ samp ].head() )

BB_test_Cos1053_JKLab0340_MM1B
                 read_count
isoform                    
((1267, 1359),)     2398566
((1267, 1350),)     2199618
((1284, 1359),)       17744
((1284, 1350),)       12115
((1268, 1350),)        5905
BB_test_Cos1053_JKLab0340_MM2B
                 read_count
isoform                    
((1267, 1359),)     5042945
((1267, 1350),)     4600777
((1284, 1359),)       39501
((1284, 1350),)       23727
((1268, 1350),)       13542
BB_test_Cos1053_oligodT_MM1B
                 read_count
isoform                    
((1267, 1359),)     2188404
((1267, 1350),)     1959934
((1284, 1359),)       17534
((1284, 1350),)       11526
((1268, 1350),)        5871
BB_test_Cos1053_oligodT_MM2B
                 read_count
isoform                    
((1267, 1359),)     6167843
((1267, 1350),)     5723480
((1284, 1359),)       44962
((1284, 1350),)       30212
((1269, 1350),)       17823
BB_RT_1_340_243_Cos1053
                 read_count
isoform                    
((1267, 1359),) 

In [17]:
%%time
isogrp_df = jn.number_and_merge_isoforms( isos_dfs )

BB_test_Cos1053_JKLab0340_MM1B
BB_test_Cos1053_JKLab0340_MM2B
BB_test_Cos1053_oligodT_MM1B
BB_test_Cos1053_oligodT_MM2B
BB_RT_1_340_243_Cos1053
BB_RT_1_340_243_Hek1053
BB_RT_1_340_335_Cos1053
BB_RT_1_340_335_Hek1053
BB_RT_1b_340_243_Hek1053
BB_RT_1b_340_335_Hek1053
BB_RT_1b_ODT_243_Hek1053
BB_RT_1b_ODT_335_Hek1053
BB_RT_1_ODT_243_Cos1053
BB_RT_1_ODT_243_Hek1053
BB_RT_1_ODT_335_Cos1053
BB_RT_1_ODT_335_Hek1053
BB_RT_2_340_243_Cos1054
BB_RT_2_340_243_Hek1054
BB_RT_2_340_335_Cos1054
BB_RT_2_340_335_Hek1054
BB_RT_2_ODT_243_Cos1054
BB_RT_2_ODT_243_Hek1054
BB_RT_2_ODT_335_Cos1054
BB_RT_2_ODT_335_Hek1054
BB_RT_3_340_243_Cos1055
BB_RT_3_340_243_Hek1055
BB_RT_3_340_335_Cos1055
BB_RT_3_340_335_Hek1055
BB_RT_3_ODT_243_Cos1055
BB_RT_3_ODT_243_Hek1055
BB_RT_3_ODT_335_cos1055
BB_RT_3_ODT_335_Hek1055
CPU times: user 15 s, sys: 35 ms, total: 15.1 s
Wall time: 15.1 s


In [18]:
isogrp_df.head()

Unnamed: 0_level_0,isoform,BB_test_Cos1053_JKLab0340_MM1B_read_count,BB_test_Cos1053_JKLab0340_MM2B_read_count,BB_test_Cos1053_oligodT_MM1B_read_count,BB_test_Cos1053_oligodT_MM2B_read_count,BB_RT_1_340_243_Cos1053_read_count,BB_RT_1_340_243_Hek1053_read_count,BB_RT_1_340_335_Cos1053_read_count,BB_RT_1_340_335_Hek1053_read_count,BB_RT_1b_340_243_Hek1053_read_count,...,BB_RT_2_ODT_335_Cos1054_read_count,BB_RT_2_ODT_335_Hek1054_read_count,BB_RT_3_340_243_Cos1055_read_count,BB_RT_3_340_243_Hek1055_read_count,BB_RT_3_340_335_Cos1055_read_count,BB_RT_3_340_335_Hek1055_read_count,BB_RT_3_ODT_243_Cos1055_read_count,BB_RT_3_ODT_243_Hek1055_read_count,BB_RT_3_ODT_335_cos1055_read_count,BB_RT_3_ODT_335_Hek1055_read_count
isonum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
iso0000,"((1136, 1225), (1301, 1350))",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
iso0001,"((1267, 1290), (1320, 1350))",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
iso0002,"((1267, 1314), (1337, 1359))",1,272,0,0,1,127,64,59,140,...,0,0,0,0,0,0,0,0,0,0
iso0003,"((1267, 1282), (1328, 1359))",0,0,2,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
iso0004,"((1136, 1224), (1306, 1359))",0,0,0,0,0,1,0,0,2,...,0,0,0,0,0,0,0,0,0,0


In [19]:
isogrp_df.loc[ isogrp_df.isoform == ((1267, 1359),) ]

Unnamed: 0_level_0,isoform,BB_test_Cos1053_JKLab0340_MM1B_read_count,BB_test_Cos1053_JKLab0340_MM2B_read_count,BB_test_Cos1053_oligodT_MM1B_read_count,BB_test_Cos1053_oligodT_MM2B_read_count,BB_RT_1_340_243_Cos1053_read_count,BB_RT_1_340_243_Hek1053_read_count,BB_RT_1_340_335_Cos1053_read_count,BB_RT_1_340_335_Hek1053_read_count,BB_RT_1b_340_243_Hek1053_read_count,...,BB_RT_2_ODT_335_Cos1054_read_count,BB_RT_2_ODT_335_Hek1054_read_count,BB_RT_3_340_243_Cos1055_read_count,BB_RT_3_340_243_Hek1055_read_count,BB_RT_3_340_335_Cos1055_read_count,BB_RT_3_340_335_Hek1055_read_count,BB_RT_3_ODT_243_Cos1055_read_count,BB_RT_3_ODT_243_Hek1055_read_count,BB_RT_3_ODT_335_cos1055_read_count,BB_RT_3_ODT_335_Hek1055_read_count
isonum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
iso2352,"((1267, 1359),)",2398566,5042945,2188404,6167843,1848520,3402481,498989,1002711,3088365,...,147921,670427,933745,2231432,641611,1423766,2632227,1602464,768068,1087995


In [20]:
isogrp_df.loc[ isogrp_df.isoform == ((1267, 1350),) ]

Unnamed: 0_level_0,isoform,BB_test_Cos1053_JKLab0340_MM1B_read_count,BB_test_Cos1053_JKLab0340_MM2B_read_count,BB_test_Cos1053_oligodT_MM1B_read_count,BB_test_Cos1053_oligodT_MM2B_read_count,BB_RT_1_340_243_Cos1053_read_count,BB_RT_1_340_243_Hek1053_read_count,BB_RT_1_340_335_Cos1053_read_count,BB_RT_1_340_335_Hek1053_read_count,BB_RT_1b_340_243_Hek1053_read_count,...,BB_RT_2_ODT_335_Cos1054_read_count,BB_RT_2_ODT_335_Hek1054_read_count,BB_RT_3_340_243_Cos1055_read_count,BB_RT_3_340_243_Hek1055_read_count,BB_RT_3_340_335_Cos1055_read_count,BB_RT_3_340_335_Hek1055_read_count,BB_RT_3_ODT_243_Cos1055_read_count,BB_RT_3_ODT_243_Hek1055_read_count,BB_RT_3_ODT_335_cos1055_read_count,BB_RT_3_ODT_335_Hek1055_read_count
isonum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
iso1216,"((1267, 1350),)",2199618,4600777,1959934,5723480,1639841,3024206,456883,942553,2722204,...,131835,643766,853119,1925367,606695,1466166,2361301,1371139,728822,1187494


In [21]:
satbls = ! ls /nfs/turbo/umms-kitzmanj/oldvol2/jacob/proj/jensplice/20220426_wt1_subasm_filter_stringent/sapipe/sa/*.haps.all.txt

In [22]:
satbl_fn = { sa.split( '/')[ -1 ].split( '.' )[ 0 ]: pd.read_table( sa ).set_index( 'readgroupid' ) for sa in satbls }

In [23]:
for lib in satbl_fn.keys():
    
    satbl_fn[ lib ] = satbl_fn[ lib ][ [ col for col in satbl_fn[ lib ] if not col.startswith( 'nbp_' ) ] ].copy()

In [24]:
exonbed = pbt.BedTool( '/nfs/kitzman2/smithcat/proj/wt1_2022/refs/wt1_ex9.bed' )

In [25]:
isos = jn.make_junction_graph( exonbed )

In [26]:
isos

{'iso00': ((650, 696), (1267, 1350), (3479, 3533)),
 'iso01': ((650, 696), (1267, 1359), (3479, 3533)),
 'iso02': ((650, 696), (3479, 3533))}

In [27]:
unique_jns = list( { jn for grp,jn_tups in isos.items() for jn_tup in jn_tups for jn in jn_tup
                       if 696 < jn < 3479 } ) 

In [28]:
unique_jns

[1267, 1350, 1359]

In [29]:
msamp_rnabam = { samp : pysam.AlignmentFile( msamp_fn[ samp ], 'rb' ) for samp in msamp_fn }

In [30]:
for lib in satbl_fn:
    
    print( lib )
    
    print( satbl_fn[ lib ].head() )

JKP1053
                         passes                              status  \
readgroupid                                                           
AAAAAAAATCACGACCCTCCTGG   False  possible_chimeric_no_major_variant   
AAAAAAAGGGCTTCCGGGTATGG    True                   no_variants_input   
AAAAAAAGGGTTCCAGACTGTGG   False                     toomanymajorvar   
AAAAAAAGTAGTCTGGTGTGTGG    True                                pass   
AAAAAAATACCCCGGATGATTGG   False  possible_chimeric_no_major_variant   

                         n_variants_passing  \
readgroupid                                   
AAAAAAAATCACGACCCTCCTGG                   6   
AAAAAAAGGGCTTCCGGGTATGG                   0   
AAAAAAAGGGTTCCAGACTGTGG                   2   
AAAAAAAGTAGTCTGGTGTGTGG                   1   
AAAAAAATACCCCGGATGATTGG                   1   

                                                              variant_list  
readgroupid                                                                 
AAAAAAAATC

In [None]:
%%time
#18 min/sample

iso_df_stats = { lib: jn.summarize_isos_by_var_bc_pe( { samp: bam for samp,bam in msamp_rnabam.items()
                                                        if lib.replace( 'JKP', '' ) in samp },
                                                      [ ( 649, 696 ), ( 3478, 3533 ) ],
                                                      satbl_fn[ lib ],
                                                      isogrp_df,
                                                      unique_jns,
                                                      [ ( ( 1266, 1350 ), ), ( ( 1266, 1359 ), ), () ],
                                                      spl_tol = 3,
                                                      indel_tol = 20,
                                                      min_matches_for = 90,
                                                      min_matches_rev = 70,
                                                      bc_tag = 'BC',
                                                     ) 
                  for lib in satbl_fn }

BB_test_Cos1053_JKLab0340_MM1B
Barcodes processed: 1000
Reads processed: 90524
Barcodes processed: 2000
Reads processed: 182528
Barcodes processed: 3000
Reads processed: 268259
Barcodes processed: 4000
Reads processed: 355293
Barcodes processed: 5000
Reads processed: 440992
Barcodes processed: 6000
Reads processed: 529596
Barcodes processed: 7000
Reads processed: 617027
Barcodes processed: 8000
Reads processed: 705801
Barcodes processed: 9000
Reads processed: 792668
Barcodes processed: 10000
Reads processed: 886122
Barcodes processed: 11000
Reads processed: 990842
Barcodes processed: 12000
Reads processed: 1079892
Barcodes processed: 13000
Reads processed: 1169247
Barcodes processed: 14000
Reads processed: 1249789
Barcodes processed: 15000
Reads processed: 1359041
Barcodes processed: 16000
Reads processed: 1449930
Barcodes processed: 17000
Reads processed: 1536675
Barcodes processed: 18000
Reads processed: 1622228
Barcodes processed: 19000
Reads processed: 1705752
Barcodes processed: 2

For isoform: ((1269, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1267:GTG:GG,jkp815:1365:T:C', 8), ('jkp815:1267:GTG:GG', 6), ('jkp815:1288:A:C', 3), ('jkp815:1269:G:T', 3), ('jkp815:1360:G:T', 3)]
For isoform: ((1267, 1274), (1296, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1273:AACCATTCCAGTGTAAAACTTGT:AT,jkp815:1316:C:T', 2)]
For isoform: ((1146, 1231), (1317, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1224:CCAC:CC,jkp815:1296:C:T,jkp815:1353:A:G', 1), ('jkp815:1312:G:T,jkp815:1392:T:A', 1), ('jkp815:1227:C:T', 1)]
For isoform: ((1267, 1312), (1351, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1252:T:G', 2), ('jkp815:1345:A:T', 1), ('jkp815:1327:A:T', 1), ('jkp815:1368:A:T', 1), ('jkp815:1339:G:T', 1)]
For isoform: ((1267, 1327),)
The variants with the top 5 number of barcodes are:
[('jkp815:1277:A:C,jkp815:1309:C:G,jkp815:1337:C:T,jkp815:1343:T:A', 1), ('jkp815:1227:C:A,jkp815:1316:

For isoform: ((1267, 1284), (1318, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1228:A:C', 2), ('jkp815:1228:A:G', 2), ('jkp815:1366:A:C', 2), ('jkp815:1318:A:T', 2), ('jkp815:1345:A:G', 2)]
For isoform: ((1267, 1355), (2222, 2227))
The variants with the top 5 number of barcodes are:
[('jkp815:1352:TAAAACAAG:TAAAAACAAG', 1), ('jkp815:1298:G:T,jkp815:1308:TCCCGGT:TCCGGT,jkp815:1352:TAAAACAAG:TAAAAACAAG', 1)]
For isoform: ((1273, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1246:A:C', 2), ('jkp815:1389:T:A', 2), ('jkp815:1310:C:G', 1), ('jkp815:1227:C:G', 1), ('jkp815:1283:G:C', 1)]
For isoform: ((1267, 1322), (1353, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1307:C:G', 1), ('jkp815:1353:A:C', 1), ('jkp815:1369:CTTTTCTTCA:CTTTCTTCA,jkp815:1381:T:A', 1), ('jkp815:1383:T:A', 1), ('jkp815:1246:AGA:AA', 1)]
For isoform: ((1267, 1304), (1336, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1287:A:T

For isoform: ((1236, 1256), (1297, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1312:G:T', 1), ('jkp815:1231:G:A,jkp815:1247:G:T,jkp815:1347:A:T,jkp815:1362:GCG:GG,jkp815:1388:T:A', 1), ('jkp815:1287:A:C', 1), ('jkp815:1327:A:T', 1), ('jkp815:1229:TTGTTAGGGCCGA:TA', 1)]
For isoform: ((1281, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1389:T:C', 1), ('jkp815:1284:TGT:TT', 1)]
For isoform: ((1204, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1295:TCA:TCCA,jkp815:1377:C:A', 1), ('jkp815:1322:C:T', 1), ('jkp815:1360:GTGCGTAAA:GA', 1), ('jkp815:1323:C:G,jkp815:1340:G:C', 1), ('jkp815:1373:T:A', 1)]
For isoform: ((1267, 1287), (1326, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1228:A:T', 1), ('jkp815:1251:T:C', 1), ('jkp815:1241:A:G,jkp815:1349:AGGTA:AGTA', 1), ('jkp815:1305:T:G,jkp815:1399:T:A', 1)]
For isoform: ((1267, 1343), (1550, 1556))
The variants with the top 5 number of barcodes are:


For isoform: ((1267, 1294), (1324, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1297:A:T', 1)]
For isoform: ((1237, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1222:GCCCACATTGTTAG:GCCACATA,jkp815:1241:AGGCTAGA:AC,jkp815:1253:C:T,jkp815:1255:CTGTCCATT:CT', 1), ('jkp815:1335:A:C', 1)]
For isoform: ((1267, 1336), (2036, 2042), (2236, 2243))
The variants with the top 5 number of barcodes are:
[('jkp815:1337:CAGGACTCATAC:CC', 1)]
For isoform: ((1268, 1293), (1334, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1293:TGTCAGCGAAAGTTCTCCCGGTCCGACCACCTGAAGACCCAC:TC', 1)]
For isoform: ((1146, 1223), (1309, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1344:C:G', 1)]
For isoform: ((1146, 1222), (1309, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1359:A:T', 1)]
For isoform: ((1267, 1319), (1352, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1330:C:T', 1)]
For

Barcodes processed: 1000
Reads processed: 223951
Barcodes processed: 2000
Reads processed: 469105
Barcodes processed: 3000
Reads processed: 677309
Barcodes processed: 4000
Reads processed: 895975
Barcodes processed: 5000
Reads processed: 1113682
Barcodes processed: 6000
Reads processed: 1334691
Barcodes processed: 7000
Reads processed: 1542224
Barcodes processed: 8000
Reads processed: 1783514
Barcodes processed: 9000
Reads processed: 2023828
Barcodes processed: 10000
Reads processed: 2245091
Barcodes processed: 11000
Reads processed: 2464203
Barcodes processed: 12000
Reads processed: 2706719
Barcodes processed: 13000
Reads processed: 2951270
Barcodes processed: 14000
Reads processed: 3173814
Barcodes processed: 15000
Reads processed: 3382647
Barcodes processed: 16000
Reads processed: 3592452
Barcodes processed: 17000
Reads processed: 3816353
Barcodes processed: 18000
Reads processed: 4027263
Barcodes processed: 19000
Reads processed: 4249861
Barcodes processed: 20000
Reads processed: 4

The variants with the top 5 number of barcodes are:
[('jkp815:1309:C:T', 2), ('jkp815:1226:ACA:AA', 1), ('jkp815:1390:C:T,jkp815:1394:A:T', 1), ('jkp815:1264:T:G', 1), ('jkp815:1288:A:T', 1)]
For isoform: ((1305, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1227:C:T', 2), ('jkp815:1299:C:G', 2), ('jkp815:1351:G:T,jkp815:1359:A:G', 1), ('jkp815:1261:A:G,jkp815:1392:T:C', 1), ('jkp815:1239:C:A', 1)]
For isoform: ((1267, 1336), (2978, 2985))
The variants with the top 5 number of barcodes are:
[('jkp815:1389:T:A', 2), ('jkp815:1311:C:G,jkp815:1361:TGCGTAA:TA', 1), ('jkp815:1259:C:T', 1), ('jkp815:1238:C:A,jkp815:1293:T:C,jkp815:1307:C:G', 1), ('jkp815:1395:T:C', 1)]
For isoform: ((1269, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1227:C:T', 7), ('jkp815:1227:C:A', 6), ('jkp815:1228:A:T', 5), ('jkp815:1324:T:C', 5), ('jkp815:1358:A:G', 5)]
For isoform: ((1268, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1228:A:T', 

The variants with the top 5 number of barcodes are:
[('jkp815:1227:C:T', 2), ('jkp815:1367:A:T', 1), ('jkp815:1263:T:C', 1), ('jkp815:1245:T:A', 1), ('jkp815:1247:G:T', 1)]
For isoform: ((1329, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1237:G:A,jkp815:1253:C:T,jkp815:1347:A:C', 1), ('jkp815:1261:A:T,jkp815:1311:CGGTC:CGTC', 1), ('jkp815:1228:A:G', 1), ('jkp815:1295:T:C,jkp815:1369:C:A', 1), ('jkp815:1389:T:A', 1)]
For isoform: ((1190, 1220), (1292, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1237:G:A,jkp815:1253:C:T,jkp815:1347:A:C', 1), ('jkp815:1240:G:T', 1), ('jkp815:1228:A:G', 1), ('jkp815:1274:A:G,jkp815:1299:C:T', 1), ('jkp815:1298:G:T', 1)]
For isoform: ((1267, 1286), (1318, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1246:A:C,jkp815:1367:A:G', 1), ('jkp815:1242:G:T,jkp815:1281:CAGTGTAAAACTTGTCAGCGAAAGTTCTCCCGGTCCGACCACCTGAAGACCCAC:CC,jkp815:1349:A:T', 1), ('jkp815:1235:G:A', 1), ('jkp815:1330:C:T', 1)

For isoform: ((1267, 1283), (1321, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1227:C:A', 2), ('jkp815:1274:A:C,jkp815:1279:TCCAG:TCAG', 1), ('jkp815:1228:A:G', 1), ('jkp815:1254:T:C,jkp815:1281:C:A,jkp815:1348:C:A', 1), ('jkp815:1334:C:A', 1)]
For isoform: ((1267, 1293), (1328, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1275:C:A,jkp815:1349:AGGTA:AGTA', 1), ('jkp815:1227:C:A,jkp815:1236:G:A', 1), ('jkp815:1312:G:T,jkp815:1337:C:T', 1), ('jkp815:1307:C:T,jkp815:1321:A:G', 1), ('jkp815:1254:T:G,jkp815:1313:G:T', 1)]
For isoform: ((1267, 1350), (3271, 3276))
The variants with the top 5 number of barcodes are:
[('jkp815:1331:C:A,jkp815:1351:GTAAAAC:GC,jkp815:1399:T:G', 1)]
For isoform: ((1267, 1348), (2340, 2345))
The variants with the top 5 number of barcodes are:
[('jkp815:1331:C:A,jkp815:1351:GTAAAAC:GC,jkp815:1399:T:G', 1)]
For isoform: ((1190, 1224), (1298, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1384:ATT

[('jkp815:1229:T:C', 1), ('jkp815:1309:C:T', 1), ('jkp815:1340:G:T,jkp815:1353:A:C', 1)]
For isoform: ((1269, 1345),)
The variants with the top 5 number of barcodes are:
[('jkp815:1344:CATACAGGTAAAACAA:CA', 1)]
For isoform: ((1146, 1225), (1307, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1271:GAAACCAT:GAACCAT', 1), ('jkp815:1319:C:A,jkp815:1360:G:T', 1), ('jkp815:1398:T:G', 1), ('jkp815:1311:C:T', 1), ('jkp815:1289:A:T', 1)]
For isoform: ((1236, 1250), (1284, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1262:T:A,jkp815:1369:C:G', 1), ('jkp815:1373:TCT:TT,jkp815:1386:T:G', 1), ('jkp815:1295:T:G,jkp815:1377:C:A', 1), ('jkp815:1227:C:A', 1), ('jkp815:1357:C:A', 1)]
For isoform: ((1267, 1351), (2584, 2591))
The variants with the top 5 number of barcodes are:
[('jkp815:1351:GTA:GA', 1), ('jkp815:1351:GTA:GA,jkp815:1365:T:A', 1)]
For isoform: ((1255, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1349:A:G', 1), ('jkp815

For isoform: ((1146, 1221), (1298, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1286:T:C', 1)]
For isoform: ((1267, 2291),)
The variants with the top 5 number of barcodes are:
[('jkp815:1311:C:G', 1)]
For isoform: ((1267, 1352), (2706, 2712))
The variants with the top 5 number of barcodes are:
[('jkp815:1249:C:A,jkp815:1354:A:C', 1)]
For isoform: ((1267, 1295), (1336, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1294:GTCAGCGAAAGTTCTCCCGGTCCGACCACCTGAAGACCCACA:GA', 1)]
For isoform: ((1267, 1352), (1449, 1454))
The variants with the top 5 number of barcodes are:
[('jkp815:1295:T:G,jkp815:1377:C:A', 1)]
For isoform: ((1152, 1224), (1287, 1317), (1318, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1296:C:A', 1)]
For isoform: ((1190, 1225), (1290, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1233:T:G,jkp815:1244:C:G,jkp815:1335:A:G', 1), ('jkp815:1273:A:T,jkp815:1299:C:A,jkp815:1347:A:C', 1), ('jk

Barcodes processed: 1000
Reads processed: 75002
Barcodes processed: 2000
Reads processed: 150772
Barcodes processed: 3000
Reads processed: 223496
Barcodes processed: 4000
Reads processed: 297275
Barcodes processed: 5000
Reads processed: 364971
Barcodes processed: 6000
Reads processed: 438159
Barcodes processed: 7000
Reads processed: 509686
Barcodes processed: 8000
Reads processed: 580078
Barcodes processed: 9000
Reads processed: 650319
Barcodes processed: 10000
Reads processed: 729880
Barcodes processed: 11000
Reads processed: 806650
Barcodes processed: 12000
Reads processed: 892126
Barcodes processed: 13000
Reads processed: 968554
Barcodes processed: 14000
Reads processed: 1045463
Barcodes processed: 15000
Reads processed: 1115679
Barcodes processed: 16000
Reads processed: 1199414
Barcodes processed: 17000
Reads processed: 1280710
Barcodes processed: 18000
Reads processed: 1350380
Barcodes processed: 19000
Reads processed: 1429798
Barcodes processed: 20000
Reads processed: 1499014
Bar

For isoform: ((1269, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1267:GTG:GG', 7), ('jkp815:1267:GTG:GG,jkp815:1365:T:C', 5), ('jkp815:1228:A:G', 4), ('jkp815:1227:C:T', 4), ('jkp815:1366:A:T', 3)]
For isoform: ((1269, 1359),)
The variants with the top 5 number of barcodes are:
[('jkp815:1267:GTG:GG', 12), ('jkp815:1227:C:G', 6), ('jkp815:1227:C:A', 4), ('jkp815:1362:G:A', 4), ('jkp815:1228:A:C', 4)]
For isoform: ((1267, 1323), (1353, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1253:C:G,jkp815:1323:CTGAAGACCCACACCAGGACTCATACAGGTA:CA', 2), ('jkp815:1266:G:C', 1), ('jkp815:1381:T:A', 1), ('jkp815:1262:T:A', 1), ('jkp815:1388:T:G', 1)]
For isoform: ((1267, 1279), (1322, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1227:C:T', 2), ('jkp815:1279:T:G', 2), ('jkp815:1238:C:G,jkp815:1323:C:T', 1), ('jkp815:1312:G:T', 1), ('jkp815:1273:A:G', 1)]
For isoform: ((1136, 1359),)
The variants with the top 5 number of barcodes a

For isoform: ((1190, 1225), (1291, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1347:A:C', 1), ('jkp815:1250:C:A', 1), ('jkp815:1355:A:C', 1), ('jkp815:1268:T:G,jkp815:1301:AAAGTTCTCCCGGTCCGACCACCTGAAGACCCACACCAGGACTCATA:AA', 1), ('jkp815:1339:G:T', 1)]
For isoform: ((1236, 1244), (1290, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1288:A:T', 1), ('jkp815:1245:T:C', 1), ('jkp815:1332:C:G', 1), ('jkp815:1207:TGT:TGGT', 1), ('jkp815:1253:C:T', 1)]
For isoform: ((1267, 1341), (2393, 2399))
The variants with the top 5 number of barcodes are:
[('jkp815:1335:A:G', 1), ('jkp815:1335:A:C', 1), ('jkp815:1378:A:T', 1), ('jkp815:1333:A:T', 1), ('jkp815:1327:A:T', 1)]
For isoform: ((1248, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1259:C:T,jkp815:1336:CCAGGACTCATACAGG:CG', 1), ('jkp815:1226:ACA:AA,jkp815:1337:C:T', 1), ('jkp815:1373:TCT:TT,jkp815:1386:T:G', 1), ('jkp815:1230:TGTTAGGGCCGAGGCTAGACCTTCTCTGTCCAT:TT,jkp815:1230:

For isoform: ((1267, 1284), (1329, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1332:C:G,jkp815:1340:G:C', 1), ('jkp815:1330:C:T,jkp815:1343:T:C', 1), ('jkp815:1344:C:T,jkp815:1347:A:G', 1)]
For isoform: ((1267, 1347),)
The variants with the top 5 number of barcodes are:
[('jkp815:1244:CTA:CA,jkp815:1304:G:T', 1), ('jkp815:1347:ACA:AA', 1), ('jkp815:1227:C:A', 1), ('jkp815:1225:CAC:CC', 1), ('jkp815:1272:A:T,jkp815:1328:G:T', 1)]
For isoform: ((1267, 1326),)
The variants with the top 5 number of barcodes are:
[('jkp815:1302:A:T', 1), ('jkp815:1294:G:C', 1), ('jkp815:1322:CCTGAAGACCCACACCAGGACTCAT:CT,jkp815:1362:G:T', 1), ('jkp815:1244:C:A', 1), ('jkp815:1250:C:G,jkp815:1270:T:C,jkp815:1323:CTGAAGACCCACACCAGGA:CA', 1)]
For isoform: ((1236, 1261), (1314, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1306:T:G', 1), ('jkp815:1260:C:G', 1), ('jkp815:1249:C:A,jkp815:1301:A:C', 1), ('jkp815:1281:C:G', 1), ('jkp815:1307:C:A', 1)]
For isoform: ((12

For isoform: ((1321, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1361:T:G', 1)]
For isoform: ((1267, 1339), (1762, 1768))
The variants with the top 5 number of barcodes are:
[('jkp815:1337:C:G,jkp815:1360:G:A', 1)]
For isoform: ((1267, 1282), (1328, 1350))
The variants with the top 5 number of barcodes are:
[('jkp815:1355:A:G', 1)]
For isoform: ((1200, 1350),)
The variants with the top 5 number of barcodes are:
[('jkp815:1378:A:T', 1), ('jkp815:1358:A:G', 1), ('jkp815:1226:A:C,jkp815:1296:C:A', 1), ('jkp815:1234:A:C,jkp815:1323:C:A', 1)]
For isoform: ((1267, 1353), (2485, 2491))
The variants with the top 5 number of barcodes are:
[('jkp815:1352:TAAAACAAG:TAAAAACAAG', 1)]
For isoform: ((1146, 1224), (1307, 1359))
The variants with the top 5 number of barcodes are:
[('jkp815:1281:C:G', 1), ('jkp815:1329:A:G', 1)]
For isoform: ((1267, 1331), (1913, 1917), (2978, 2985))
The variants with the top 5 number of barcodes are:
[('jkp815:1260:C:A,jkp815:1339:GGACTCATACAG

Barcodes processed: 1000
Reads processed: 239643
Barcodes processed: 2000
Reads processed: 475391
Barcodes processed: 3000
Reads processed: 688690
Barcodes processed: 4000
Reads processed: 922523
Barcodes processed: 5000
Reads processed: 1130029
Barcodes processed: 6000
Reads processed: 1362616
Barcodes processed: 7000
Reads processed: 1579124
Barcodes processed: 8000
Reads processed: 1785785
Barcodes processed: 9000
Reads processed: 2008558
Barcodes processed: 10000
Reads processed: 2247305
Barcodes processed: 11000
Reads processed: 2498811
Barcodes processed: 12000
Reads processed: 2740206
Barcodes processed: 13000
Reads processed: 2971192


In [53]:
for lib in iso_df_stats:
    print( lib, iso_df_stats[ lib ].shape )

JKP1053 (6519, 117)
JKP1054 (6519, 77)
JKP1055 (6519, 77)


In [54]:
for lib in iso_df_stats:
    print( lib, iso_df_stats[ lib ].query( 'total_passfilt > 0' ).shape )

JKP1053 (246, 117)
JKP1054 (234, 77)
JKP1055 (271, 77)


In [55]:
for lib in iso_df_stats:
    print( lib, ( iso_df_stats[ lib ].query( 'total_passfilt == 0' ).total_read_count.sum() / iso_df_stats[ lib ].total_read_count.sum() )*100 )

JKP1053 0.2577983139205421
JKP1054 0.44423740668137823
JKP1055 0.4259980693578306


In [56]:
for lib in iso_df_stats:
    print( lib, ( iso_df_stats[ lib ].query( 'total_passfilt == 0' ).total_num_bcs.sum() / iso_df_stats[ lib ].total_num_bcs.sum() )*100 )

JKP1053 0.3260256995463436
JKP1054 0.4095385378261281
JKP1055 0.2074927016579704


In [57]:
iso_df_stats[ 'JKP1053' ]

Unnamed: 0_level_0,isoform,BB_test_Cos1053_JKLab0340_MM1B_read_count,BB_test_Cos1053_JKLab0340_MM2B_read_count,BB_test_Cos1053_oligodT_MM1B_read_count,BB_test_Cos1053_oligodT_MM2B_read_count,BB_RT_1_340_243_Cos1053_read_count,BB_RT_1_340_243_Hek1053_read_count,BB_RT_1_340_335_Cos1053_read_count,BB_RT_1_340_335_Hek1053_read_count,BB_RT_1b_340_243_Hek1053_read_count,...,BB_RT_1_ODT_335_Cos1053_filter,BB_RT_1_ODT_335_Hek1053_num_bcs,BB_RT_1_ODT_335_Hek1053_num_vars,BB_RT_1_ODT_335_Hek1053_max_reads_per_bc,BB_RT_1_ODT_335_Hek1053_max_bc_per_var,BB_RT_1_ODT_335_Hek1053_filter,total_read_count,total_num_bcs,total_num_vars,total_passfilt
isonum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
iso0000,"((1136, 1225), (1301, 1350))",0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,3,1.0,1.0,0
iso0001,"((1267, 1290), (1320, 1350))",0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,2,1.0,1.0,0
iso0002,"((1267, 1314), (1337, 1359))",1,272,0,0,1,127,64,59,140,...,0.0,10.0,6.0,7.0,5.0,3.0,1105,170.0,129.0,9
iso0003,"((1267, 1282), (1328, 1359))",0,0,2,0,0,0,0,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,11,2.0,2.0,0
iso0004,"((1136, 1224), (1306, 1359))",0,0,0,0,0,1,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,4,1.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
iso6514,"((1161, 1228), (1308, 1350))",0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0.0,0
iso6515,"((1236, 1242), (1266, 1350))",0,0,0,0,0,0,0,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,1.0,0
iso6516,"((1146, 1222), (1281, 1359))",0,0,0,0,0,3,0,0,3,...,0.0,0.0,0.0,0.0,0.0,0.0,12,1.0,1.0,0
iso6517,"((1284, 1348),)",0,1,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,3,1.0,1.0,0


In [None]:
bdout = '/nfs/kitzman2/smithcat/proj/wt1_2022/ex9_data/'

In [None]:
for lib in iso_df_stats.keys():    
    iso_df_stats[ lib ].reset_index().to_csv( bdout + 'wt1_ex9_%s_isoforms_2022-0514.txt' % lib,
                                               sep = '\t',
                                               index = False )

In [None]:
iso_df_stats_all = iso_df_stats[ 'JKP1053' ].merge( iso_df_stats[ 'JKP1054' ],
                                                    left_index = True,
                                                    right_index = True,
                                                    how = 'outer',
                                                    suffixes = ( '_JKP1053', '_JKP1054' )
                                                  )

In [58]:
iso_df_stats_all

Unnamed: 0_level_0,isoform_JKP1053,BB_test_Cos1053_JKLab0340_MM1B_read_count_JKP1053,BB_test_Cos1053_JKLab0340_MM2B_read_count_JKP1053,BB_test_Cos1053_oligodT_MM1B_read_count_JKP1053,BB_test_Cos1053_oligodT_MM2B_read_count_JKP1053,BB_RT_1_340_243_Cos1053_read_count_JKP1053,BB_RT_1_340_243_Hek1053_read_count_JKP1053,BB_RT_1_340_335_Cos1053_read_count_JKP1053,BB_RT_1_340_335_Hek1053_read_count_JKP1053,BB_RT_1b_340_243_Hek1053_read_count_JKP1053,...,BB_RT_3_ODT_335_Hek1055_max_bc_per_var,BB_RT_3_ODT_335_Hek1055_filter,total_read_count_JKP1055,total_num_bcs_JKP1055,total_num_vars_JKP1055,total_passfilt_JKP1055,total_read_count,total_num_bcs,total_num_vars,total_passfilt
isonum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
iso0000,"((1136, 1225), (1301, 1350))",0,0,0,0,0,0,0,0,0,...,0.0,0.0,3,0.0,0.0,0,9,1.0,1.0,0
iso0001,"((1267, 1290), (1320, 1350))",0,0,0,0,0,0,0,0,0,...,0.0,0.0,2,0.0,0.0,0,6,1.0,1.0,0
iso0002,"((1267, 1314), (1337, 1359))",1,272,0,0,1,127,64,59,140,...,0.0,0.0,1105,0.0,0.0,0,3315,170.0,129.0,9
iso0003,"((1267, 1282), (1328, 1359))",0,0,2,0,0,0,0,1,0,...,0.0,0.0,11,0.0,0.0,0,33,2.0,2.0,0
iso0004,"((1136, 1224), (1306, 1359))",0,0,0,0,0,1,0,0,2,...,0.0,0.0,4,0.0,0.0,0,12,1.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
iso6514,"((1161, 1228), (1308, 1350))",0,0,0,0,0,0,0,0,0,...,0.0,0.0,2,1.0,1.0,0,6,1.0,1.0,0
iso6515,"((1236, 1242), (1266, 1350))",0,0,0,0,0,0,0,0,1,...,0.0,0.0,1,0.0,0.0,0,3,1.0,1.0,0
iso6516,"((1146, 1222), (1281, 1359))",0,0,0,0,0,3,0,0,3,...,0.0,0.0,12,0.0,0.0,0,36,1.0,1.0,0
iso6517,"((1284, 1348),)",0,1,0,0,0,0,0,0,0,...,1.0,0.0,3,1.0,1.0,0,9,2.0,2.0,0


In [None]:
iso_df_stats_all = iso_df_stats_all.merge( iso_df_stats[ 'JKP1055' ].rename( columns = { col: col + '_JKP1055' 
                                                                                         for col in iso_df_stats[ 'JKP1055' ] if col.startswith( 'total_' ) } ),
                                            left_index = True,
                                            right_index = True,
                                            how = 'outer',
                                            suffixes = ( '', '_JKP1055' )
                                        )

In [None]:
total_cols = [ 'total_read_count', 'total_num_bcs', 'total_num_vars', 'total_passfilt' ]

for col in total_cols:
    
    iso_df_stats_all[ col ] = iso_df_stats_all[ [ lcol for lcol in iso_df_stats_all if col + '_' in lcol ] ].sum( axis = 1 )

In [59]:
len( bam )

32

In [60]:
iso_df_stats_all.shape

(6519, 275)

In [61]:
iso_df_stats_all.query( 'total_passfilt > 0' ).shape

(458, 275)

In [62]:
iso_df_stats_all.query( 'total_passfilt >= 16' ).shape

(68, 275)

In [63]:
( iso_df_stats_all.query( 'total_passfilt == 0' ).total_read_count.sum() / iso_df_stats_all.total_read_count.sum() )*100

0.11690630375482747

In [64]:
( iso_df_stats_all.query( 'total_passfilt >= 16' ).total_read_count.sum() / iso_df_stats_all.total_read_count.sum() )*100

99.44893871196696

In [65]:
( iso_df_stats_all.query( 'total_passfilt == 0' ).total_num_bcs.sum() / iso_df_stats_all.total_num_bcs.sum() )*100

0.22911197377290127

In [66]:
( iso_df_stats_all.query( 'total_passfilt >= 16' ).total_num_bcs.sum() / iso_df_stats_all.total_num_bcs.sum() )*100

99.00864433284829

In [67]:
iso_df_stats_all.total_num_bcs.isnull().sum()

0

In [None]:
iso_df_stats_all.reset_index().to_csv( bdout + 'wt1_ex9_isoforms_2022-0514.txt',
                                               sep = '\t',
                                               index = False )