In [1]:
import numpy as np
import os
import timeit
import glob 
import pandas as pd
import time
import multiprocessing as mp 
import logging
import sys 
import pathlib
from pathlib import Path
import matplotlib.pyplot as plt 

Matplotlib created a temporary config/cache directory at /scratch/slurm-job.871838/matplotlib-xr3ba5k0 because the default path (/cluster/customapps/biomed/grlab/users/prelotla/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.
Matplotlib is building the font cache; this may take a moment.


In [2]:
def get_junction_coordinates(df, coordinates_col, sep=':'):
    df['strand'] = None
    df['junction_coordinate'] = None

    for idx, row in df.iterrows():
        kmer_coordinates = [int(x) for x in row[coordinates_col].split(sep) if (x !='None') ]

        if kmer_coordinates[1] < kmer_coordinates[2]: # order strand +

            df.loc[idx, 'strand'] = '+'
            if len(kmer_coordinates) == 4:  # 2 exons
                df.loc[idx, 'junction_coordinate'] = ':'.join([str(x) for x in kmer_coordinates[1:3]])
            elif len(kmer_coordinates) == 6:
                df.loc[idx, 'junction_coordinate'] = ':'.join([str(x) for x in kmer_coordinates[1:5]])
        else: # order strand +
            df.loc[idx, 'strand'] = '-'
            if len(kmer_coordinates) == 4:  # 2 exons
                df.loc[idx, 'junction_coordinate'] = ':'.join([str(x) for x in [kmer_coordinates[3],
                                                                                kmer_coordinates[0]]])
            elif len(kmer_coordinates) == 6:
                df.loc[idx, 'junction_coordinate'] = ':'.join([str(x) for x in [kmer_coordinates[3],
                                                                                kmer_coordinates[0],
                                                                                kmer_coordinates[2],
                                                                                kmer_coordinates[5]
                                                                               ]])
    return df




### Get filtered data

In [8]:
# Inputs
run_type = 'ov'

if run_type == 'brca':
    target_samples = ['TCGA-C8-A12P-01A-11R-A115-07.all',
                      'TCGA-AO-A0JM-01A-21R-A056-07.all',
                      'TCGA-BH-A18V-01A-11R-A12D-07.all',
                      'TCGA-A2-A0D2-01A-21R-A034-07.all',
                      'TCGA-A2-A0SX-01A-12R-A084-07.all']
    sample_target = 'TCGA-AO-A0JM-01A-21R-A056-07'
    basedir = '/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/CANCER_eth/commit_c4dd02c_conf2_Frame_cap0_runs/TCGA_Breast_1102'
    intermediate_output = os.path.join(basedir, 'filtering_intermediate/complete_cancer_candidates_order_r.tsv.gz')
elif run_type == 'ov':
    target_samples = ['TCGA-25-1319-01A-01R-1565-13.all',
                      'TCGA-25-1313-01A-01R-1565-13.all',
                      'TCGA-61-2008-01A-02R-1568-13.all',
                      'TCGA-24-1431-01A-01R-1566-13.all',
                      'TCGA-24-2298-01A-01R-1569-13.all']
    sample_target = 'TCGA-25-1319-01A-01R-1565-13'
    basedir = '/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/CANCER_eth/commit_c4dd02c_conf2_Frame_cap0_runs/TCGA_Ovarian_374'
    intermediate_output = os.path.join(basedir, 'filtering_intermediate/complete_cancer_candidates_order_r.tsv.gz')


# Outputs
filtering_id = 'filters_22March_order_wany_wAnnot'
output_dir = os.path.join(basedir, 'filtering_samples', filtering_id)

In [4]:
# Load generation matrix
df_load = pd.read_csv(intermediate_output, sep = '\t')

In [9]:
# Load filtered kmers
path_interest = f'G_{sample_target}_SampleLim0.0CohortLimNoneAcrossNone_FiltNormalsGtexCohortCohortlim0.0Across1.tsv.gz'
path_interest = os.path.join(output_dir, path_interest)
filt = pd.read_csv(path_interest, sep = '\t')
print(filt.shape)

(131575, 4)


In [10]:
# Merge generation matrix & filtered kmers
filt_meta = df_load.merge(filt, on = list(filt.columns), how = 'right')
print(filt_meta.shape)

(131962, 24)


In [11]:
# Define problematic kmer
# exp_lim = 600
# filt_meta_pb = filt_meta.loc[filt_meta['cancerCohortfilter >0.0'] > exp_lim]
# filt_meta_pb = filt_meta_pb[filt_meta_pb[sample_target.replace('-', '') + 'all'] > 0 ]
# print(f'filtered kmers-junctions {filt_meta.shape[0]}')
# print(f'filtered kmers unique {len(filt_meta.kmer.unique())}')
# print(f'filtered kmers-junctions problematic {filt_meta_pb.shape[0]}')
# print(f'filtered kmers problematic unique {len(filt_meta_pb.kmer.unique())}')
filt_meta_pb = filt_meta

In [12]:
# Columns magic
core_cols = ['kmer', 'gtexCohortfilter >0.0', 'coord',
       'junctionAnnotated', 'readFrameAnnotated', 'isCrossJunction', 'batch',
       'cancerCohortfilter >0.0', sample_target.replace('-', '') + 'all', 'isAnnotated']

df_load.columns

Index(['kmer', 'gtexCohortfilter >0.0', 'gtexCohortfilter >=1.0',
       'gtexCohortfilter >=2.0', 'gtexCohortfilter >=3.0',
       'gtexCohortfilter >=5.0', 'gtexCohortfilter >=10.0', 'coord',
       'junctionAnnotated', 'readFrameAnnotated', 'isCrossJunction', 'batch',
       'cancerCohortfilter >0.0', 'cancerCohortfilter >=1.0',
       'cancerCohortfilter >=2.0', 'cancerCohortfilter >=3.0',
       'cancerCohortfilter >=5.0', 'cancerCohortfilter >=10.0',
       'TCGA25131901A01R156513all', 'TCGA25131301A01R156513all',
       'TCGA61200801A02R156813all', 'TCGA24143101A01R156613all',
       'TCGA24229801A01R156913all', 'isAnnotated'],
      dtype='object')

In [13]:
# Extract coordinates of the merged table
filt_meta_pb = get_junction_coordinates(filt_meta_pb, 'coord', sep=':')

display(filt_meta_pb[core_cols + ['junction_coordinate']].head())

Unnamed: 0,kmer,gtexCohortfilter >0.0,coord,junctionAnnotated,readFrameAnnotated,isCrossJunction,batch,cancerCohortfilter >0.0,TCGA25131901A01R156513all,isAnnotated,junction_coordinate
0,RMREATKGP,,55350885:55350897:55352540:55352555:None:None,False,True,True,16543,2,10.91096,,55350897:55352540
1,QVERMREAT,,55350876:55350897:55352540:55352546:None:None,False,True,True,16543,2,10.91096,,55350897:55352540
2,VERMREATK,,55350879:55350897:55352540:55352549:None:None,False,True,True,16543,2,10.91096,,55350897:55352540
3,MREATKGPV,,55350888:55350897:55352540:55352558:None:None,False,True,True,16543,2,10.91096,,55350897:55352540
4,VQVERMREA,,55350873:55350897:55352540:55352543:None:None,False,True,True,16543,2,10.91096,,55350897:55352540


### Add the peptide metadata

In [14]:
# Define peptide file columns of interest 
cols_correct = ['peptide','id','readFrame','readFrameAnnotated','geneName','geneChr','geneStrand',
'mutationMode','hasStopCodon','isInJunctionList',
'isIsolated','variantComb','variantSegExpr','modifiedExonsCoord',
'originalExonsCoord',
'vertexIdx','kmerType', 'dummy1', 'dummy2', 'dummy3']

cols_pep_file = ['peptide', 'id', 'readFrame', 'geneName',
       'geneChr', 'geneStrand', 'mutationMode',
       'hasStopCodon', 'isInJunctionList', 'isIsolated',
       'variantSegExpr', 'modifiedExonsCoord', 'originalExonsCoord',
       'vertexIdx', 'kmerType']


cols_pep_file = {col:  'gtex_' + col for col in cols_pep_file}


In [15]:
len(filt_meta_pb['batch'].unique()) # Now 121 brca, # 2792 OV

2792

In [16]:
filt_meta_pb.head()

Unnamed: 0,kmer,gtexCohortfilter >0.0,gtexCohortfilter >=1.0,gtexCohortfilter >=2.0,gtexCohortfilter >=3.0,gtexCohortfilter >=5.0,gtexCohortfilter >=10.0,coord,junctionAnnotated,readFrameAnnotated,...,cancerCohortfilter >=5.0,cancerCohortfilter >=10.0,TCGA25131901A01R156513all,TCGA25131301A01R156513all,TCGA61200801A02R156813all,TCGA24143101A01R156613all,TCGA24229801A01R156913all,isAnnotated,strand,junction_coordinate
0,RMREATKGP,,,,,,,55350885:55350897:55352540:55352555:None:None,False,True,...,2,2,10.91096,0.0,0.0,0.0,0.0,,+,55350897:55352540
1,QVERMREAT,,,,,,,55350876:55350897:55352540:55352546:None:None,False,True,...,2,2,10.91096,0.0,0.0,0.0,0.0,,+,55350897:55352540
2,VERMREATK,,,,,,,55350879:55350897:55352540:55352549:None:None,False,True,...,2,2,10.91096,0.0,0.0,0.0,0.0,,+,55350897:55352540
3,MREATKGPV,,,,,,,55350888:55350897:55352540:55352558:None:None,False,True,...,2,2,10.91096,0.0,0.0,0.0,0.0,,+,55350897:55352540
4,VQVERMREA,,,,,,,55350873:55350897:55352540:55352543:None:None,False,True,...,2,2,10.91096,0.0,0.0,0.0,0.0,,+,55350897:55352540


### Make False positive hypothesis

In [17]:
gtex_dir = '/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref'
check_hypothesis = []
kmers_not_in_gtex = []
kmers_no_metadata = []

for batch_i, batch in enumerate(filt_meta_pb['batch'].unique()):
    print(f'\n Iteration {batch_i} batch {batch}')
    # CANCER SIDE GET BATCH Junctions
    batch_kmers_init = filt_meta_pb.loc[filt_meta_pb['batch'] == batch]
    print(f'Size cancer kmers-junctions {len(batch_kmers_init)}')
    
    # Read GTEX peptide file
    pep_path = os.path.join(gtex_dir, 'cohort_mutNone', f'tmp_out_ref_batch_{batch}', 'ref_sample_peptides_meta.gz')
    if os.path.exists(pep_path):
        ##### COPY 
        not_in_background = False


        print(pep_path)
        meta_pep = pd.read_csv(pep_path, sep = '\t')
        print(meta_pep.shape)
        meta_pep.columns = cols_correct # ISSUE WITH IMMUNOPEPPER



        # Only bi or tri-exons peptides
        jx_ids = [idx for idx, coord in enumerate(meta_pep['modifiedExonsCoord']) if 'nan' not in coord]
        meta_pep = meta_pep.iloc[jx_ids]

        # Quick Assess Cancer junction presence in GTEX
        coord_int = [i.split(':') for i in batch_kmers_init['junction_coordinate'].unique()]
        keep = set()
        for cd in coord_int:
            keep.update([idx for idx, coord in enumerate(meta_pep['modifiedExonsCoord']) \
                         if (cd[0] in coord) and (cd[1] in coord)])
        meta_pep = meta_pep.iloc[list(keep)]

        if keep: # Some target junctions are in "potentially" found in gtex 
            # Rename
            meta_pep = meta_pep.rename(cols_pep_file, axis = 1)

            # Extract coordinates peptides
            meta_pep = get_junction_coordinates(meta_pep, 'gtex_modifiedExonsCoord', sep=';')

            # Add Peptide info 
            meta_pep = meta_pep[list(cols_pep_file.values()) + ['junction_coordinate']].drop_duplicates()
            
            exist_jx_not_in_gtex = set(batch_kmers_init['junction_coordinate']).difference(set(meta_pep['junction_coordinate']))
            exist_jx_in_gtex = set(batch_kmers_init['junction_coordinate']).intersection(set(meta_pep['junction_coordinate']))

            if exist_jx_in_gtex: # Some target junctions are in gtex
                batch_kmers = batch_kmers_init.merge(meta_pep, on = 'junction_coordinate', how = 'inner')
                print(f'Size cancer kmers-junctions + all GTEX peptides {len(batch_kmers)}')

                # Calculate the number of aa fitting on each side of the junctions
                batch_kmers['gtex_aa_E1'] = batch_kmers['gtex_modifiedExonsCoord'].str.split(';').map(lambda x: abs(int(x[0]) - int(x[1]))/3 )
                batch_kmers['gtex_aa_E2'] = batch_kmers['gtex_modifiedExonsCoord'].str.split(';').map(lambda x: abs(int(x[2]) - int(x[3]))/3 )
                batch_kmers['cancer_aa_E1'] = batch_kmers['coord'].str.split(':').map(lambda x: abs(int(x[0]) - int(x[1]))/3 )
                batch_kmers['cancer_aa_E2'] = batch_kmers['coord'].str.split(':').map(lambda x: abs(int(x[2]) - int(x[3]))/3 )
                # What about 3 exons?

                # Validate hypothesis: 
                # H1: The second exon is not long enough to get the translation through in GTEX. BUT no exon was added on the right
                new_col = 'gtexE2<cancE2'
                batch_kmers[new_col] = True
                batch_kmers.loc[batch_kmers['gtex_aa_E2'] > batch_kmers['cancer_aa_E2'], new_col] = False

                res = batch_kmers[['kmer', new_col, 'junction_coordinate' ]].drop_duplicates()
                display( batch_kmers[['kmer', 'cancerCohortfilter >0.0',\
                                      sample_target.replace('-', '') + 'all',
                                      'readFrameAnnotated', \
                                      'junctionAnnotated', \
                                      new_col ]].drop_duplicates() )
                check_hypothesis.append(res[['kmer', 'junction_coordinate']])
                
            if exist_jx_not_in_gtex: # Some target junctions are NOT in gtex
                not_in_background = True
                diff = batch_kmers_init.set_index('junction_coordinate').loc[exist_jx_not_in_gtex].reset_index() #left anti join

        
        else: # No target junctions are in GTEX at all
            not_in_background = True
            diff = batch_kmers_init.copy()

        if not_in_background:
            #diff = diff[['kmer', 'junction_coordinate', 'cancerCohortfilter >0.0', ]].drop_duplicates()
            kmers_not_in_gtex.append(diff[['kmer', 'junction_coordinate']])
            print(f'{diff.shape[0]} Kmers - junctions not found in gtex. Recurrence is:')
            print(diff['cancerCohortfilter >0.0'].unique())
            for rec in diff['cancerCohortfilter >0.0'].unique():
                if rec > 100: 
                    display( diff[['kmer', 'junction_coordinate', 'coord', \
                                   'cancerCohortfilter >0.0',\
                                      sample_target.replace('-', '') + 'all',
                                      'readFrameAnnotated', \
                                      'junctionAnnotated' ]].drop_duplicates() )
            print(f'{diff.shape[0]} Kmers - junctions not found in gtex. Junction annotated is:')
            print(diff['junctionAnnotated'].unique())
            print(f'{diff.shape[0]} Kmers - junctions not found in gtex. RF annotated is:')
            print(diff['readFrameAnnotated'].unique())

            
        ##### END COPY 
    else:
        check_path = os.path.join(gtex_dir, 'cohort_mutNone', f'tmp_out_ref_batch_{batch}')
        kmers_no_metadata.append(batch_kmers_init[['kmer', 'junction_coordinate']])
        for rec in diff['cancerCohortfilter >0.0'].unique():
            if rec > 1000: 
                display(batch_kmers_init[['kmer', 'junction_coordinate']])
        print(f'CHECK COMPLETION OF {check_path}')
        continue
#         for pep_path in glob.glob(pep_path + '/*'):
           




 Iteration 0 batch 16543
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_16543/ref_sample_peptides_meta.gz
(15185, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[2]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 1 batch 12597
Size cancer kmers-junctions 4
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_12597/ref_sample_peptides_meta.gz
(31021, 20)
4 Kmers - junctions not found in gtex. Recurrence is:
[4]
4 Kmers - junctions not found in gtex. Junction annotated is:
[False]
4 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 2 batch 41047
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generati

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,SRIPCPSQL,33,5.875132,False,False,True
9,RIPCPSQLP,33,5.875132,False,False,True
18,GSPAPHSCR,33,5.875132,False,False,True
27,IPCPSQLPA,33,5.875132,False,False,True
36,SRIPCPSQN,143,5.875132,False,True,True
81,IPCPSQNTS,143,5.875132,False,True,True
126,PGSPAPHRI,143,5.875132,False,True,True
171,GSPAPHRIH,143,5.875132,False,True,True
216,RIPCPSQNT,143,5.875132,False,True,True
261,IPCPSQEKV,20,1.678609,False,False,True


87 Kmers - junctions not found in gtex. Recurrence is:
[ 5 12 24 16  4 26 89  8]
87 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
87 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 4 batch 416
Size cancer kmers-junctions 19
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_416/ref_sample_peptides_meta.gz
(33104, 20)
19 Kmers - junctions not found in gtex. Recurrence is:
[17 14  4]
19 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
19 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 5 batch 27568
Size cancer kmers-junctions 122
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_27568/ref_sample_peptides_meta.gz
(11126, 20)
122 Kmers - junctions not found in gtex. Recurrence is:
[88 20 12 28 44 42

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MLRKERELR,35697806:35697872,35697872:35697895:35697802:35697806:None:None,10,0.839305,False,False
1,KERELRDEH,35697806:35697872,35697872:35697886:35697793:35697806:None:None,10,0.839305,False,False
2,LRKERELRD,35697806:35697872,35697872:35697892:35697799:35697806:None:None,10,0.839305,False,False
3,RKERELRDE,35697806:35697872,35697872:35697889:35697796:35697806:None:None,10,0.839305,False,False
4,LQKPSSSRL,35699382:35699443,35699443:35699458:35699370:35699382:None:None,8,0.839305,False,False
...,...,...,...,...,...,...,...
133,MLHRFLILS,35697441:35697888,35697888:35697895:35697421:35697441:None:None,18,0.839305,False,False
134,EMLHRFLIL,35697441:35697888,35697888:35697898:35697424:35697441:None:None,18,0.839305,False,False
135,QEEMLHRFL,35697441:35697888,35697888:35697904:35697430:35697441:None:None,18,0.839305,False,False
136,STVLRISRW,35720121:35720206,35720206:35720218:35720106:35720121:None:None,6,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MLRKERELR,35697806:35697872,35697872:35697895:35697802:35697806:None:None,10,0.839305,False,False
1,KERELRDEH,35697806:35697872,35697872:35697886:35697793:35697806:None:None,10,0.839305,False,False
2,LRKERELRD,35697806:35697872,35697872:35697892:35697799:35697806:None:None,10,0.839305,False,False
3,RKERELRDE,35697806:35697872,35697872:35697889:35697796:35697806:None:None,10,0.839305,False,False
4,LQKPSSSRL,35699382:35699443,35699443:35699458:35699370:35699382:None:None,8,0.839305,False,False
...,...,...,...,...,...,...,...
133,MLHRFLILS,35697441:35697888,35697888:35697895:35697421:35697441:None:None,18,0.839305,False,False
134,EMLHRFLIL,35697441:35697888,35697888:35697898:35697424:35697441:None:None,18,0.839305,False,False
135,QEEMLHRFL,35697441:35697888,35697888:35697904:35697430:35697441:None:None,18,0.839305,False,False
136,STVLRISRW,35720121:35720206,35720206:35720218:35720106:35720121:None:None,6,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MLRKERELR,35697806:35697872,35697872:35697895:35697802:35697806:None:None,10,0.839305,False,False
1,KERELRDEH,35697806:35697872,35697872:35697886:35697793:35697806:None:None,10,0.839305,False,False
2,LRKERELRD,35697806:35697872,35697872:35697892:35697799:35697806:None:None,10,0.839305,False,False
3,RKERELRDE,35697806:35697872,35697872:35697889:35697796:35697806:None:None,10,0.839305,False,False
4,LQKPSSSRL,35699382:35699443,35699443:35699458:35699370:35699382:None:None,8,0.839305,False,False
...,...,...,...,...,...,...,...
133,MLHRFLILS,35697441:35697888,35697888:35697895:35697421:35697441:None:None,18,0.839305,False,False
134,EMLHRFLIL,35697441:35697888,35697888:35697898:35697424:35697441:None:None,18,0.839305,False,False
135,QEEMLHRFL,35697441:35697888,35697888:35697904:35697430:35697441:None:None,18,0.839305,False,False
136,STVLRISRW,35720121:35720206,35720206:35720218:35720106:35720121:None:None,6,0.839305,False,False


138 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
138 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 7 batch 24125
Size cancer kmers-junctions 18
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_24125/ref_sample_peptides_meta.gz
(26762, 20)
18 Kmers - junctions not found in gtex. Recurrence is:
[17]
18 Kmers - junctions not found in gtex. Junction annotated is:
[False]
18 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 8 batch 12257
Size cancer kmers-junctions 77
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_12257/ref_sample_peptides_meta.gz
(47094, 20)
77 Kmers - junctions not found in gtex. Recurrence is:
[ 17  73   4   8  43   7 215  27   6   1]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,DWCPTGFKF,49185183:49185309,49185309:49185333:49185180:49185183:None:None,17,167.021623,False,False
1,CPTGFKFDL,49185183:49185309,49185309:49185327:49185174:49185183:None:None,17,167.021623,False,False
2,GFKFDLMYA,49185183:49185309,49185309:49185318:49185165:49185183:None:None,17,167.021623,False,False
3,TGFKFDLMY,49185183:49185309,49185309:49185321:49185168:49185183:None:None,17,167.021623,False,False
4,WCPTGFKFD,49185183:49185309,49185309:49185330:49185177:49185183:None:None,17,167.021623,False,False
...,...,...,...,...,...,...,...
72,MVDNEEVGV,49185066:49185744,49185744:49185759:49185054:49185066:None:None,1,1.678609,False,False
73,FMVDNEEVG,49185066:49185744,49185744:49185762:49185057:49185066:None:None,1,1.678609,False,False
74,NEEVGVDSV,49185066:49185744,49185744:49185750:49185045:49185066:None:None,1,1.678609,False,False
75,CAFMVDNEE,49185066:49185744,49185744:49185768:49185063:49185066:None:None,1,1.678609,False,False


77 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
77 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 9 batch 29635
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_29635/ref_sample_peptides_meta.gz
(84319, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[70]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 10 batch 40221
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_40221/ref_sample_peptides_meta.gz
(31821, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[9]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
618,CLDHTPEPG,22002145:22002198,22002140:22002145:22002198:22002220:None:None,101,0.839305,False,False
619,GCCSCLDHT,22002145:22002198,22002128:22002145:22002198:22002208:None:None,101,0.839305,False,False
620,CSCLDHTPE,22002145:22002198,22002134:22002145:22002198:22002214:None:None,101,0.839305,False,False
621,CCSCLDHTP,22002145:22002198,22002131:22002145:22002198:22002211:None:None,101,0.839305,False,False
622,LDHTPEPGE,22002145:22002198,22002143:22002145:22002198:22002223:None:None,101,0.839305,False,False
623,SCLDHTPEP,22002145:22002198,22002137:22002145:22002198:22002217:None:None,101,0.839305,False,False


6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 16 batch 21230
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_21230/ref_sample_peptides_meta.gz
(30218, 20)
14 Kmers - junctions not found in gtex. Recurrence is:
[10  6]
14 Kmers - junctions not found in gtex. Junction annotated is:
[False]
14 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 17 batch 54115
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_54115/ref_sample_peptides_meta.gz
(2327, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[1]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found in gtex. RF

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
716,NPHPALLRW,45112282:45112613,45112613:45112637:45112279:45112282:None:None,132,0.839305,False,False
717,QSEGLQRWV,45120974:45121076,45121076:45121092:45120963:45120974:None:None,14,0.839305,False,False
718,ASLSPAHSP,45112488:45112651,45112651:45112669:45112479:45112488:None:None,5,0.839305,False,False
719,EGHQSEGLQ,45120974:45121076,45121076:45121101:45120972:45120974:None:None,14,0.839305,False,False
720,ALLRWALSS,45112282:45112613,45112613:45112625:45112267:45112282:None:None,132,0.839305,False,False
721,SEGLQRWVR,45120974:45121076,45121076:45121089:45120960:45120974:None:None,14,0.839305,False,False
722,GASLSPAHS,45112488:45112651,45112651:45112672:45112482:45112488:None:None,5,0.839305,False,False
723,DGASLSPAH,45112488:45112651,45112651:45112675:45112485:45112488:None:None,5,0.839305,False,False
724,LLRWALSSP,45112282:45112613,45112613:45112622:45112264:45112282:None:None,132,0.839305,False,False
725,PAHSPGAGL,45112488:45112651,45112651:45112657:45112467:45112488:None:None,5,0.839305,False,False


23 Kmers - junctions not found in gtex. Junction annotated is:
[False]
23 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 22 batch 24313
Size cancer kmers-junctions 442
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_24313/ref_sample_peptides_meta.gz
(29614, 20)
442 Kmers - junctions not found in gtex. Recurrence is:
[ 89  33 164  75 111  13  12  10  26   8  40  24   2  52  23   6  14  53
 108   4  39  37  50   1  48  11  18   9 165 238  45  16  78  27]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PARAHQTAR,41769015:41769432,41769432:41769457:41769013:41769015:None:None,89,1.678609,False,False
1,TARHLQVGW,41769015:41769432,41769432:41769439:41768995:41769015:None:None,89,1.678609,False,False
2,QTARHLQVG,41769015:41769432,41769432:41769442:41768998:41769015:None:None,89,1.678609,False,False
3,HQTARHLQV,41769015:41769432,41769432:41769445:41769001:41769015:None:None,89,1.678609,False,False
4,AHQTARHLQ,41769015:41769432,41769432:41769448:41769004:41769015:None:None,89,1.678609,False,False
...,...,...,...,...,...,...,...
437,ARAHQTALV,41768984:41769432,41769432:41769454:41768979:41768984:None:None,37,1.678609,False,False
438,PARAHQTAL,41768984:41769432,41769432:41769457:41768982:41768984:None:None,37,1.678609,False,False
439,AHQTALVRM,41768984:41769432,41769432:41769448:41768973:41768984:None:None,37,1.678609,False,False
440,DDMDGDYPS,41755826:41756174:41755805:41755432,41756174:41756178:41755805:41755826:41755430:4...,27,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PARAHQTAR,41769015:41769432,41769432:41769457:41769013:41769015:None:None,89,1.678609,False,False
1,TARHLQVGW,41769015:41769432,41769432:41769439:41768995:41769015:None:None,89,1.678609,False,False
2,QTARHLQVG,41769015:41769432,41769432:41769442:41768998:41769015:None:None,89,1.678609,False,False
3,HQTARHLQV,41769015:41769432,41769432:41769445:41769001:41769015:None:None,89,1.678609,False,False
4,AHQTARHLQ,41769015:41769432,41769432:41769448:41769004:41769015:None:None,89,1.678609,False,False
...,...,...,...,...,...,...,...
437,ARAHQTALV,41768984:41769432,41769432:41769454:41768979:41768984:None:None,37,1.678609,False,False
438,PARAHQTAL,41768984:41769432,41769432:41769457:41768982:41768984:None:None,37,1.678609,False,False
439,AHQTALVRM,41768984:41769432,41769432:41769448:41768973:41768984:None:None,37,1.678609,False,False
440,DDMDGDYPS,41755826:41756174:41755805:41755432,41756174:41756178:41755805:41755826:41755430:4...,27,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PARAHQTAR,41769015:41769432,41769432:41769457:41769013:41769015:None:None,89,1.678609,False,False
1,TARHLQVGW,41769015:41769432,41769432:41769439:41768995:41769015:None:None,89,1.678609,False,False
2,QTARHLQVG,41769015:41769432,41769432:41769442:41768998:41769015:None:None,89,1.678609,False,False
3,HQTARHLQV,41769015:41769432,41769432:41769445:41769001:41769015:None:None,89,1.678609,False,False
4,AHQTARHLQ,41769015:41769432,41769432:41769448:41769004:41769015:None:None,89,1.678609,False,False
...,...,...,...,...,...,...,...
437,ARAHQTALV,41768984:41769432,41769432:41769454:41768979:41768984:None:None,37,1.678609,False,False
438,PARAHQTAL,41768984:41769432,41769432:41769457:41768982:41768984:None:None,37,1.678609,False,False
439,AHQTALVRM,41768984:41769432,41769432:41769448:41768973:41768984:None:None,37,1.678609,False,False
440,DDMDGDYPS,41755826:41756174:41755805:41755432,41756174:41756178:41755805:41755826:41755430:4...,27,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PARAHQTAR,41769015:41769432,41769432:41769457:41769013:41769015:None:None,89,1.678609,False,False
1,TARHLQVGW,41769015:41769432,41769432:41769439:41768995:41769015:None:None,89,1.678609,False,False
2,QTARHLQVG,41769015:41769432,41769432:41769442:41768998:41769015:None:None,89,1.678609,False,False
3,HQTARHLQV,41769015:41769432,41769432:41769445:41769001:41769015:None:None,89,1.678609,False,False
4,AHQTARHLQ,41769015:41769432,41769432:41769448:41769004:41769015:None:None,89,1.678609,False,False
...,...,...,...,...,...,...,...
437,ARAHQTALV,41768984:41769432,41769432:41769454:41768979:41768984:None:None,37,1.678609,False,False
438,PARAHQTAL,41768984:41769432,41769432:41769457:41768982:41768984:None:None,37,1.678609,False,False
439,AHQTALVRM,41768984:41769432,41769432:41769448:41768973:41768984:None:None,37,1.678609,False,False
440,DDMDGDYPS,41755826:41756174:41755805:41755432,41756174:41756178:41755805:41755826:41755430:4...,27,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PARAHQTAR,41769015:41769432,41769432:41769457:41769013:41769015:None:None,89,1.678609,False,False
1,TARHLQVGW,41769015:41769432,41769432:41769439:41768995:41769015:None:None,89,1.678609,False,False
2,QTARHLQVG,41769015:41769432,41769432:41769442:41768998:41769015:None:None,89,1.678609,False,False
3,HQTARHLQV,41769015:41769432,41769432:41769445:41769001:41769015:None:None,89,1.678609,False,False
4,AHQTARHLQ,41769015:41769432,41769432:41769448:41769004:41769015:None:None,89,1.678609,False,False
...,...,...,...,...,...,...,...
437,ARAHQTALV,41768984:41769432,41769432:41769454:41768979:41768984:None:None,37,1.678609,False,False
438,PARAHQTAL,41768984:41769432,41769432:41769457:41768982:41768984:None:None,37,1.678609,False,False
439,AHQTALVRM,41768984:41769432,41769432:41769448:41768973:41768984:None:None,37,1.678609,False,False
440,DDMDGDYPS,41755826:41756174:41755805:41755432,41756174:41756178:41755805:41755826:41755430:4...,27,0.839305,False,False


442 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
442 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 23 batch 26541
Size cancer kmers-junctions 13
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_26541/ref_sample_peptides_meta.gz
(43335, 20)
13 Kmers - junctions not found in gtex. Recurrence is:
[15 21]
13 Kmers - junctions not found in gtex. Junction annotated is:
[False]
13 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 24 batch 28631
Size cancer kmers-junctions 160
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_28631/ref_sample_peptides_meta.gz
(7069, 20)
160 Kmers - junctions not found in gtex. Recurrence is:
[ 19  47  63  85 202 117   9 162  57  33  41  68  90  15]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTQHPRECH,38264895:38264984:38264998:38287875,38264892:38264895:38264984:38264998:38287875:3...,19,0.839305,True,True
1,RWYNVTDGN,38283645:38283680:38283696:38283732,38283644:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
2,RRWYNVTDG,38283645:38283680:38283696:38283732,38283641:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
3,GVEGGTMSL,38283645:38283680:38283696:38283732,38283636:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
4,CRRWYNVTD,38283645:38283680:38283696:38283732,38283638:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
...,...,...,...,...,...,...,...
155,PGVEGDGNS,38283646:38283731,38283633:38283646:38283731:38283745:None:None,41,0.839305,True,False
156,SKVTETAII,38283646:38283731,38283640:38283646:38283731:38283752:None:None,41,0.839305,False,False
157,EGDGNSNNY,38283646:38283731,38283642:38283646:38283731:38283754:None:None,41,0.839305,True,False
158,FCLVSKVTE,38283646:38283731,38283628:38283646:38283731:38283740:None:None,41,0.839305,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTQHPRECH,38264895:38264984:38264998:38287875,38264892:38264895:38264984:38264998:38287875:3...,19,0.839305,True,True
1,RWYNVTDGN,38283645:38283680:38283696:38283732,38283644:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
2,RRWYNVTDG,38283645:38283680:38283696:38283732,38283641:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
3,GVEGGTMSL,38283645:38283680:38283696:38283732,38283636:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
4,CRRWYNVTD,38283645:38283680:38283696:38283732,38283638:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
...,...,...,...,...,...,...,...
155,PGVEGDGNS,38283646:38283731,38283633:38283646:38283731:38283745:None:None,41,0.839305,True,False
156,SKVTETAII,38283646:38283731,38283640:38283646:38283731:38283752:None:None,41,0.839305,False,False
157,EGDGNSNNY,38283646:38283731,38283642:38283646:38283731:38283754:None:None,41,0.839305,True,False
158,FCLVSKVTE,38283646:38283731,38283628:38283646:38283731:38283740:None:None,41,0.839305,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTQHPRECH,38264895:38264984:38264998:38287875,38264892:38264895:38264984:38264998:38287875:3...,19,0.839305,True,True
1,RWYNVTDGN,38283645:38283680:38283696:38283732,38283644:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
2,RRWYNVTDG,38283645:38283680:38283696:38283732,38283641:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
3,GVEGGTMSL,38283645:38283680:38283696:38283732,38283636:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
4,CRRWYNVTD,38283645:38283680:38283696:38283732,38283638:38283645:38283680:38283696:38283732:3...,47,0.839305,False,False
...,...,...,...,...,...,...,...
155,PGVEGDGNS,38283646:38283731,38283633:38283646:38283731:38283745:None:None,41,0.839305,True,False
156,SKVTETAII,38283646:38283731,38283640:38283646:38283731:38283752:None:None,41,0.839305,False,False
157,EGDGNSNNY,38283646:38283731,38283642:38283646:38283731:38283754:None:None,41,0.839305,True,False
158,FCLVSKVTE,38283646:38283731,38283628:38283646:38283731:38283740:None:None,41,0.839305,True,False


160 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
160 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 25 batch 25644
Size cancer kmers-junctions 23
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_25644/ref_sample_peptides_meta.gz
(31478, 20)
23 Kmers - junctions not found in gtex. Recurrence is:
[ 3 24]
23 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
23 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 26 batch 37596
Size cancer kmers-junctions 11
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_37596/ref_sample_peptides_meta.gz
(21144, 20)
11 Kmers - junctions not found in gtex. Recurrence is:
[2 1]
11 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
11 Kmer

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GIPPTPLTF,130470845:130476919,130470839:130470845:130476919:130476940:None:None,4,0.839305,False,False
1,IPPTPLTFS,130470845:130476919,130470842:130470845:130476919:130476943:None:None,4,0.839305,False,False
2,HGIPPTPLT,130470845:130476919,130470836:130470845:130476919:130476937:None:None,4,0.839305,False,False
3,QHGIPPTPL,130470845:130476919,130470833:130470845:130476919:130476934:None:None,4,0.839305,False,False
4,SGIPKAPNT,130458547:130470838:130470846:130476913,130458545:130458547:130470838:130470846:130476...,3,0.839305,False,False
...,...,...,...,...,...,...,...
542,GEPQEPLHW,130471515:130480421,130471502:130471515:130480421:130480435:None:None,105,0.839305,False,False
543,PKNRFIGMK,130471515:130480421,130471509:130471515:130480421:130480442:None:None,105,0.839305,False,False
544,NPKNRFIGM,130471515:130480421,130471506:130471515:130480421:130480439:None:None,105,0.839305,False,False
545,SWRTPRTAS,130471515:130480421,130471498:130471515:130480421:130480431:None:None,105,0.839305,False,False


547 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
547 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 31 batch 25398
Size cancer kmers-junctions 116
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_25398/ref_sample_peptides_meta.gz
(64019, 20)
116 Kmers - junctions not found in gtex. Recurrence is:
[22 47 18  1 50 17 26 44 34]
116 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
116 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 32 batch 940
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_940/ref_sample_peptides_meta.gz
(108708, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[4]
6 Kmers - junctions not found in gtex. Junction annotated is

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,AARCVPTWG,143576770:143577135,143577135:143577143:143576751:143576770:None:None,24,3.357219,False,False
1,PAARCVPTW,143576770:143577135,143577135:143577146:143576754:143576770:None:None,24,3.357219,False,False
2,ARCVPTWGW,143576770:143577135,143577135:143577140:143576748:143576770:None:None,24,3.357219,False,False
3,QLRGVCPPG,143576770:143577135,143577135:143577145:143576753:143576770:None:None,24,3.357219,False,False
4,LRGVCPPGA,143576770:143577135,143577135:143577142:143576750:143576770:None:None,24,3.357219,False,False
...,...,...,...,...,...,...,...
69,PLPSLVNSL,143574869:143575068,143575068:143575084:143574858:143574869:None:None,21,0.839305,False,False
70,PSLVNSLCA,143574869:143575068,143575068:143575078:143574852:143574869:None:None,21,0.839305,False,False
71,SLVNSLCAG,143574869:143575068,143575068:143575075:143574849:143574869:None:None,21,0.839305,False,False
72,LCEPLPSLV,143574869:143575068,143575068:143575093:143574867:143574869:None:None,21,0.839305,False,False


74 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
74 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 39 batch 9738
Size cancer kmers-junctions 10
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9738/ref_sample_peptides_meta.gz
(25235, 20)
10 Kmers - junctions not found in gtex. Recurrence is:
[16]
10 Kmers - junctions not found in gtex. Junction annotated is:
[False]
10 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 40 batch 27698
Size cancer kmers-junctions 43
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_27698/ref_sample_peptides_meta.gz
(1167, 20)
43 Kmers - junctions not found in gtex. Recurrence is:
[113  13   3  27 165  10   7]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
2251,LKAPGLPSG,12791924:12792360,12791918:12791924:12792360:12792381:None:None,113,0.839305,True,False
2252,EDKVKTLKP,12792683:12793135,12792659:12792683:12793135:12793138:None:None,13,21.821921,False,False
2253,PRGGGSGGP,12792123:12792731,12792104:12792123:12792731:12792739:None:None,3,78.055331,True,False
2254,TLKPAGTLG,12792683:12793135,12792674:12792683:12793135:12793153:None:None,13,21.821921,False,False
2255,VKTLKPAGT,12792683:12793135,12792668:12792683:12793135:12793147:None:None,13,21.821921,True,False
2256,GGSYFSGGL,12791982:12792167,12791963:12791982:12792167:12792175:None:None,27,51.197583,True,False
2257,SLKAPGLPS,12791924:12792360,12791915:12791924:12792360:12792378:None:None,113,0.839305,True,False
2258,VKTLKPAGT,12792683:12793135,12792668:12792683:12793135:12793147:None:None,13,21.821921,False,False
2259,GPEGGGGGV,12791961:12792144,12791945:12791961:12792144:12792155:None:None,165,35.250795,True,False
2260,SGGLRRRLC,12791982:12792167,12791978:12791982:12792167:12792190:None:None,27,51.197583,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
2251,LKAPGLPSG,12791924:12792360,12791918:12791924:12792360:12792381:None:None,113,0.839305,True,False
2252,EDKVKTLKP,12792683:12793135,12792659:12792683:12793135:12793138:None:None,13,21.821921,False,False
2253,PRGGGSGGP,12792123:12792731,12792104:12792123:12792731:12792739:None:None,3,78.055331,True,False
2254,TLKPAGTLG,12792683:12793135,12792674:12792683:12793135:12793153:None:None,13,21.821921,False,False
2255,VKTLKPAGT,12792683:12793135,12792668:12792683:12793135:12793147:None:None,13,21.821921,True,False
2256,GGSYFSGGL,12791982:12792167,12791963:12791982:12792167:12792175:None:None,27,51.197583,True,False
2257,SLKAPGLPS,12791924:12792360,12791915:12791924:12792360:12792378:None:None,113,0.839305,True,False
2258,VKTLKPAGT,12792683:12793135,12792668:12792683:12793135:12793147:None:None,13,21.821921,False,False
2259,GPEGGGGGV,12791961:12792144,12791945:12791961:12792144:12792155:None:None,165,35.250795,True,False
2260,SGGLRRRLC,12791982:12792167,12791978:12791982:12792167:12792190:None:None,27,51.197583,True,False


43 Kmers - junctions not found in gtex. Junction annotated is:
[False]
43 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 41 batch 33253
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_33253/ref_sample_peptides_meta.gz
(365194, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[2]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
8 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 42 batch 58706
Size cancer kmers-junctions 53
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_58706/ref_sample_peptides_meta.gz
(1042053, 20)
Size cancer kmers-junctions + all GTEX peptides 25


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,IFLRTGDEV,5,0.839305,False,False,True
5,IIFLRTGDE,5,0.839305,False,False,True
10,FLRTGDEVK,5,0.839305,False,False,True
15,RTGDEVKKI,5,0.839305,False,False,True
20,LRTGDEVKK,5,0.839305,False,False,True


48 Kmers - junctions not found in gtex. Recurrence is:
[312  99  31  91  29  13]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,NRERQKTGG,72272705:72273322,72273322:72273333:72272689:72272705:None:None,312,1.678609,False,False
1,GVITNRERQ,72272705:72273322,72273322:72273345:72272701:72272705:None:None,312,1.678609,False,False
2,VITNRERQK,72272705:72273322,72273322:72273342:72272698:72272705:None:None,312,1.678609,False,False
3,ITNRERQKT,72272705:72273322,72273322:72273339:72272695:72272705:None:None,312,1.678609,False,False
4,TNRERQKTG,72272705:72273322,72273322:72273336:72272692:72272705:None:None,312,1.678609,False,False
5,FIKFDTGTL,72273322:72273800,72273800:72273819:72273314:72273322:None:None,99,506.100696,False,False
6,KFDTGTLDL,72273322:72273800,72273800:72273813:72273308:72273322:None:None,99,506.100696,False,False
7,TGTLDLLTW,72273322:72273800,72273800:72273804:72273299:72273322:None:None,99,506.100696,False,False
8,FDTGTLDLL,72273322:72273800,72273800:72273810:72273305:72273322:None:None,99,506.100696,False,False
9,DFIKFDTGT,72273322:72273800,72273800:72273822:72273317:72273322:None:None,99,506.100696,False,False


48 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
48 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 43 batch 12424
Size cancer kmers-junctions 3
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_12424/ref_sample_peptides_meta.gz
(85800, 20)
Size cancer kmers-junctions + all GTEX peptides 142


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,QTGRSQIGM,134,1.678609,False,False,True


2 Kmers - junctions not found in gtex. Recurrence is:
[66]
2 Kmers - junctions not found in gtex. Junction annotated is:
[False]
2 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 44 batch 50540
Size cancer kmers-junctions 128
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_50540/ref_sample_peptides_meta.gz
(34408, 20)
128 Kmers - junctions not found in gtex. Recurrence is:
[ 38  18  73 164 153   3  27  97 181   7 166   9   5  66  86]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,DPTIDGVRF,44116127:44116226:44116114:44115893,44116226:44116236:44116114:44116127:44115889:4...,38,0.839305,False,True
1,PTIDGVRFL,44116127:44116226:44116114:44115893,44116226:44116233:44116114:44116127:44115886:4...,38,0.839305,False,True
2,FDPTIDGVR,44116127:44116226:44116114:44115893,44116226:44116239:44116114:44116127:44115892:4...,38,0.839305,False,True
3,EVSEEQGFY,44116903:44117942:44116900:44116832,44117942:44117957:44116900:44116903:44116823:4...,18,0.839305,False,False
4,REVSEEQGF,44116903:44117942:44116900:44116832,44117942:44117960:44116900:44116903:44116826:4...,18,0.839305,False,False
...,...,...,...,...,...,...,...
123,LKGSQRGGC,44116483:44117648,44117648:44117655:44116463:44116483:None:None,86,0.839305,False,False
124,QRIKLKGSQ,44116483:44117648,44117648:44117667:44116475:44116483:None:None,86,0.839305,False,False
125,LQRIKLKGS,44116483:44117648,44117648:44117670:44116478:44116483:None:None,86,0.839305,False,False
126,IKLKGSQRG,44116483:44117648,44117648:44117661:44116469:44116483:None:None,86,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,DPTIDGVRF,44116127:44116226:44116114:44115893,44116226:44116236:44116114:44116127:44115889:4...,38,0.839305,False,True
1,PTIDGVRFL,44116127:44116226:44116114:44115893,44116226:44116233:44116114:44116127:44115886:4...,38,0.839305,False,True
2,FDPTIDGVR,44116127:44116226:44116114:44115893,44116226:44116239:44116114:44116127:44115892:4...,38,0.839305,False,True
3,EVSEEQGFY,44116903:44117942:44116900:44116832,44117942:44117957:44116900:44116903:44116823:4...,18,0.839305,False,False
4,REVSEEQGF,44116903:44117942:44116900:44116832,44117942:44117960:44116900:44116903:44116826:4...,18,0.839305,False,False
...,...,...,...,...,...,...,...
123,LKGSQRGGC,44116483:44117648,44117648:44117655:44116463:44116483:None:None,86,0.839305,False,False
124,QRIKLKGSQ,44116483:44117648,44117648:44117667:44116475:44116483:None:None,86,0.839305,False,False
125,LQRIKLKGS,44116483:44117648,44117648:44117670:44116478:44116483:None:None,86,0.839305,False,False
126,IKLKGSQRG,44116483:44117648,44117648:44117661:44116469:44116483:None:None,86,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,DPTIDGVRF,44116127:44116226:44116114:44115893,44116226:44116236:44116114:44116127:44115889:4...,38,0.839305,False,True
1,PTIDGVRFL,44116127:44116226:44116114:44115893,44116226:44116233:44116114:44116127:44115886:4...,38,0.839305,False,True
2,FDPTIDGVR,44116127:44116226:44116114:44115893,44116226:44116239:44116114:44116127:44115892:4...,38,0.839305,False,True
3,EVSEEQGFY,44116903:44117942:44116900:44116832,44117942:44117957:44116900:44116903:44116823:4...,18,0.839305,False,False
4,REVSEEQGF,44116903:44117942:44116900:44116832,44117942:44117960:44116900:44116903:44116826:4...,18,0.839305,False,False
...,...,...,...,...,...,...,...
123,LKGSQRGGC,44116483:44117648,44117648:44117655:44116463:44116483:None:None,86,0.839305,False,False
124,QRIKLKGSQ,44116483:44117648,44117648:44117667:44116475:44116483:None:None,86,0.839305,False,False
125,LQRIKLKGS,44116483:44117648,44117648:44117670:44116478:44116483:None:None,86,0.839305,False,False
126,IKLKGSQRG,44116483:44117648,44117648:44117661:44116469:44116483:None:None,86,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,DPTIDGVRF,44116127:44116226:44116114:44115893,44116226:44116236:44116114:44116127:44115889:4...,38,0.839305,False,True
1,PTIDGVRFL,44116127:44116226:44116114:44115893,44116226:44116233:44116114:44116127:44115886:4...,38,0.839305,False,True
2,FDPTIDGVR,44116127:44116226:44116114:44115893,44116226:44116239:44116114:44116127:44115892:4...,38,0.839305,False,True
3,EVSEEQGFY,44116903:44117942:44116900:44116832,44117942:44117957:44116900:44116903:44116823:4...,18,0.839305,False,False
4,REVSEEQGF,44116903:44117942:44116900:44116832,44117942:44117960:44116900:44116903:44116826:4...,18,0.839305,False,False
...,...,...,...,...,...,...,...
123,LKGSQRGGC,44116483:44117648,44117648:44117655:44116463:44116483:None:None,86,0.839305,False,False
124,QRIKLKGSQ,44116483:44117648,44117648:44117667:44116475:44116483:None:None,86,0.839305,False,False
125,LQRIKLKGS,44116483:44117648,44117648:44117670:44116478:44116483:None:None,86,0.839305,False,False
126,IKLKGSQRG,44116483:44117648,44117648:44117661:44116469:44116483:None:None,86,0.839305,False,False


128 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
128 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 45 batch 7880
Size cancer kmers-junctions 62
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_7880/ref_sample_peptides_meta.gz
(39387, 20)
62 Kmers - junctions not found in gtex. Recurrence is:
[ 76  79  51 155  74 186  40 131]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PGLALMHPW,883303:893426:883282:869887,893426:893430:883282:883303:869885:869887,76,1.678609,False,False
1,GLALMHPWP,883303:893426:883282:869887,893426:893427:883282:883303:869882:869887,76,1.678609,False,False
2,GPGPNAPLA,883303:893426:883282:869887,893426:893428:883282:883303:869883:869887,76,1.678609,False,False
3,EMFEVWNRL,899371:902225:899361:893469,902225:902237:899361:899371:893464:893469,79,1.678609,False,False
4,EVWNRLLAL,899371:902225:899361:893469,902225:902228:899361:899371:893455:893469,79,1.678609,False,False
...,...,...,...,...,...,...,...
57,ALMHPWPGL,869887:883282,883282:883298:869876:869887:None:None,131,1.678609,False,False
58,PNAPLAWTT,869887:883282,883282:883296:869874:869887:None:None,131,1.678609,False,False
59,QPGPNAPLA,883303:884067:883282:869887,884067:884069:883282:883303:869883:869887,131,1.678609,False,True
60,EVWNQLLAL,899371:902225:899354:893462,902225:902228:899354:899371:893455:893462,76,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PGLALMHPW,883303:893426:883282:869887,893426:893430:883282:883303:869885:869887,76,1.678609,False,False
1,GLALMHPWP,883303:893426:883282:869887,893426:893427:883282:883303:869882:869887,76,1.678609,False,False
2,GPGPNAPLA,883303:893426:883282:869887,893426:893428:883282:883303:869883:869887,76,1.678609,False,False
3,EMFEVWNRL,899371:902225:899361:893469,902225:902237:899361:899371:893464:893469,79,1.678609,False,False
4,EVWNRLLAL,899371:902225:899361:893469,902225:902228:899361:899371:893455:893469,79,1.678609,False,False
...,...,...,...,...,...,...,...
57,ALMHPWPGL,869887:883282,883282:883298:869876:869887:None:None,131,1.678609,False,False
58,PNAPLAWTT,869887:883282,883282:883296:869874:869887:None:None,131,1.678609,False,False
59,QPGPNAPLA,883303:884067:883282:869887,884067:884069:883282:883303:869883:869887,131,1.678609,False,True
60,EVWNQLLAL,899371:902225:899354:893462,902225:902228:899354:899371:893455:893462,76,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PGLALMHPW,883303:893426:883282:869887,893426:893430:883282:883303:869885:869887,76,1.678609,False,False
1,GLALMHPWP,883303:893426:883282:869887,893426:893427:883282:883303:869882:869887,76,1.678609,False,False
2,GPGPNAPLA,883303:893426:883282:869887,893426:893428:883282:883303:869883:869887,76,1.678609,False,False
3,EMFEVWNRL,899371:902225:899361:893469,902225:902237:899361:899371:893464:893469,79,1.678609,False,False
4,EVWNRLLAL,899371:902225:899361:893469,902225:902228:899361:899371:893455:893469,79,1.678609,False,False
...,...,...,...,...,...,...,...
57,ALMHPWPGL,869887:883282,883282:883298:869876:869887:None:None,131,1.678609,False,False
58,PNAPLAWTT,869887:883282,883282:883296:869874:869887:None:None,131,1.678609,False,False
59,QPGPNAPLA,883303:884067:883282:869887,884067:884069:883282:883303:869883:869887,131,1.678609,False,True
60,EVWNQLLAL,899371:902225:899354:893462,902225:902228:899354:899371:893455:893462,76,0.839305,False,False


62 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
62 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 46 batch 49806
Size cancer kmers-junctions 16
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_49806/ref_sample_peptides_meta.gz
(48463, 20)
16 Kmers - junctions not found in gtex. Recurrence is:
[10]
16 Kmers - junctions not found in gtex. Junction annotated is:
[False]
16 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 47 batch 24773
Size cancer kmers-junctions 28
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_24773/ref_sample_peptides_meta.gz
(52664, 20)
28 Kmers - junctions not found in gtex. Recurrence is:
[ 9  5 11  6  3  8]
28 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
28 Kmers -

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,RSARRSCVG,49107878:49108037:49108057:49108309,49107877:49107878:49108037:49108057:49108309:4...,131,1.678609,False,False
1,RDKERSKDK,49107878:49107962,49107870:49107878:49107962:49107981:None:None,38,1.678609,False,False
2,DKERSKDKD,49107878:49107962,49107873:49107878:49107962:49107984:None:None,38,1.678609,False,False
3,RERDKERSK,49107878:49107962,49107864:49107878:49107962:49107975:None:None,38,1.678609,False,False
4,SETRSGART,49107878:49107962,49107868:49107878:49107962:49107979:None:None,38,1.678609,False,False
5,ERDKERSKD,49107878:49107962,49107867:49107878:49107962:49107978:None:None,38,1.678609,False,False
6,AGSETRSGA,49107878:49107962,49107862:49107878:49107962:49107973:None:None,38,1.678609,False,False
7,ETRSGARTR,49107878:49107962,49107871:49107878:49107962:49107982:None:None,38,1.678609,False,False
8,TRSGARTRT,49107878:49107962,49107874:49107878:49107962:49107985:None:None,38,1.678609,False,False
9,KERSKDKDR,49107878:49107962,49107876:49107878:49107962:49107987:None:None,38,1.678609,False,False


15 Kmers - junctions not found in gtex. Junction annotated is:
[False]
15 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 52 batch 10813
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_10813/ref_sample_peptides_meta.gz
(11485, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[34]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 53 batch 51786
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_51786/ref_sample_peptides_meta.gz
(26814, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[2]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found in gtex. RF ann

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KVEAPPHPL,14093237:14093627:14093225:14092918,14093627:14093630:14093225:14093237:14092906:1...,8,0.839305,False,True
1,QRKVEAPPH,14093237:14093627:14093225:14092918,14093627:14093636:14093225:14093237:14092912:1...,8,0.839305,False,True
2,RKVEAPPHP,14093237:14093627:14093225:14092918,14093627:14093633:14093225:14093237:14092909:1...,8,0.839305,False,True
3,YQRKVEAPP,14093237:14093627:14093225:14092918,14093627:14093639:14093225:14093237:14092915:1...,8,0.839305,False,True
4,VEAPPHPLH,14092918:14093225,14093225:14093237:14092903:14092918:None:None,8,0.839305,False,False
5,EAPPHPLHP,14092918:14093225,14093225:14093234:14092900:14092918:None:None,8,0.839305,False,False
6,KFKGPRVPC,14093018:14093202,14093202:14093216:14093005:14093018:None:None,145,1.678609,False,False
7,FIPKFKGPR,14093018:14093202,14093202:14093225:14093014:14093018:None:None,145,1.678609,False,False
8,FKGPRVPCI,14093018:14093202,14093202:14093213:14093002:14093018:None:None,145,1.678609,False,False
9,IPKFKGPRV,14093018:14093202,14093202:14093222:14093011:14093018:None:None,145,1.678609,False,False


42 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
42 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 55 batch 50515
Size cancer kmers-junctions 3
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_50515/ref_sample_peptides_meta.gz
(4642, 20)
3 Kmers - junctions not found in gtex. Recurrence is:
[10]
3 Kmers - junctions not found in gtex. Junction annotated is:
[False]
3 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 56 batch 48303
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_48303/ref_sample_peptides_meta.gz
(28253, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[3]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. R

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,PKSVPQGLR,123,0.839305,False,True,True
623,SVPQGLRPA,123,0.839305,False,True,True
1246,KSVPQGLRP,123,0.839305,False,True,True
1869,TPKSVPQGL,123,0.839305,False,True,True


56 Kmers - junctions not found in gtex. Recurrence is:
[ 66 130   5  87  29  59]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASSSKVYVQ,75985207:75986158,75985192:75985207:75986158:75986170:None:None,66,1.678609,False,False
1,SIASSSKVY,75985207:75986158,75985186:75985207:75986158:75986164:None:None,66,1.678609,False,False
2,SKVYVQHLL,75985207:75986158,75985201:75985207:75986158:75986179:None:None,66,1.678609,False,False
3,SSSKVYVQH,75985207:75986158,75985195:75985207:75986158:75986173:None:None,66,1.678609,False,False
4,SSKVYVQHL,75985207:75986158,75985198:75985207:75986158:75986176:None:None,66,1.678609,False,False
5,IASSSKVYV,75985207:75986158,75985189:75985207:75986158:75986167:None:None,66,1.678609,False,False
6,CCTSWRSST,75984880:75985993,75984872:75984880:75985993:75986012:None:None,130,1.678609,False,False
7,RAGAVPQGR,75984880:75985993,75984879:75984880:75985993:75986019:None:None,130,1.678609,False,False
8,CTSWRSSTG,75984880:75985993,75984875:75984880:75985993:75986015:None:None,130,1.678609,False,False
9,LYELAQFHR,75984880:75985993,75984874:75984880:75985993:75986014:None:None,130,1.678609,False,False


56 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
56 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 64 batch 19838
Size cancer kmers-junctions 26
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_19838/ref_sample_peptides_meta.gz
(50920, 20)
Size cancer kmers-junctions + all GTEX peptides 15


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,AGCLHFPVA,63,0.839305,False,True,True
5,CLHFPVAQG,63,0.839305,False,True,True
10,GCLHFPVAQ,63,0.839305,False,True,True


23 Kmers - junctions not found in gtex. Recurrence is:
[22]
23 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
23 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 65 batch 21155
Size cancer kmers-junctions 11
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_21155/ref_sample_peptides_meta.gz
(28998, 20)
11 Kmers - junctions not found in gtex. Recurrence is:
[67]
11 Kmers - junctions not found in gtex. Junction annotated is:
[False]
11 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 66 batch 52454
Size cancer kmers-junctions 11
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_52454/ref_sample_peptides_meta.gz
(11193, 20)
11 Kmers - junctions not found in gtex. Recurrence is:
[11 15]
11 Kmers - junctions not found in

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,FASNIIFSN,192,0.839305,False,False,False
3,INLTFASNI,192,0.839305,False,False,False
6,NLTFASNII,192,0.839305,False,False,False
9,TFASNIIFS,192,0.839305,False,False,False
12,LTFASNIIF,192,0.839305,False,False,False


36 Kmers - junctions not found in gtex. Recurrence is:
[36 27  7  9 52  8]
36 Kmers - junctions not found in gtex. Junction annotated is:
[False]
36 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 70 batch 8851
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_8851/ref_sample_peptides_meta.gz
(3467, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[21]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 71 batch 9464
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9464/ref_sample_peptides_meta.gz
(22366, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[17]
6 Kmers - junctions not found in gtex. Junct

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
3181,PFPCPIVLL,41431753:41432011,41432011:41432021:41431736:41431753:None:None,108,8.393046,False,False
3182,LSMPHSIAG,41431753:41432011,41432011:41432019:41431734:41431753:None:None,108,8.393046,False,False
3183,QPFPCPIVL,41431753:41432011,41432011:41432024:41431739:41431753:None:None,108,8.393046,False,False
3184,PCPIVLLVM,41431753:41432011,41432011:41432015:41431730:41431753:None:None,108,8.393046,False,False
3185,SMPHSIAGY,41431753:41432011,41432011:41432016:41431731:41431753:None:None,108,8.393046,False,False
3186,FPCPIVLLV,41431753:41432011,41432011:41432018:41431733:41431753:None:None,108,8.393046,False,False
3187,CPIVLLVMD,41431753:41432011,41432011:41432012:41431727:41431753:None:None,108,8.393046,False,False
3188,AALSMPHSI,41431753:41432011,41432011:41432025:41431740:41431753:None:None,108,8.393046,False,False
3189,ALSMPHSIA,41431753:41432011,41432011:41432022:41431737:41431753:None:None,108,8.393046,False,False
3190,MPHSIAGYG,41431753:41432011,41432011:41432013:41431728:41431753:None:None,108,8.393046,False,False


10 Kmers - junctions not found in gtex. Junction annotated is:
[False]
10 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 77 batch 25722
Size cancer kmers-junctions 4
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_25722/ref_sample_peptides_meta.gz
(59970, 20)
Size cancer kmers-junctions + all GTEX peptides 52


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,ASENVGRQV,5,3.357219,False,False,True
13,SENVGRQVW,5,3.357219,True,False,True
26,ASENVGRQV,5,3.357219,True,False,True
39,SENVGRQVW,5,3.357219,False,False,True



 Iteration 78 batch 20331
Size cancer kmers-junctions 130
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_20331/ref_sample_peptides_meta.gz
(56379, 20)
130 Kmers - junctions not found in gtex. Recurrence is:
[ 84  13  25  85   7  96  24  41 101  38]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,DSALPINRP,2253968:2255643,2255643:2255657:2253955:2253968:None:None,84,0.839305,False,False
1,SPPDKQATE,2253968:2255643,2255643:2255652:2253950:2253968:None:None,84,0.839305,False,False
2,PPGDSALPI,2253968:2255643,2255643:2255666:2253964:2253968:None:None,84,0.839305,False,False
3,SALPINRPL,2253968:2255643,2255643:2255654:2253952:2253968:None:None,84,0.839305,False,False
4,FSPPDKQAT,2253968:2255643,2255643:2255655:2253953:2253968:None:None,84,0.839305,False,False
...,...,...,...,...,...,...,...
125,QEITATAVP,2253603:2255694,2255694:2255712:2253594:2253603:None:None,38,0.839305,False,False
126,ATAVPLLLL,2253603:2255694,2255694:2255700:2253582:2253603:None:None,38,0.839305,False,False
127,EITATAVPL,2253603:2255694,2255694:2255709:2253591:2253603:None:None,38,0.839305,False,False
128,RSLPPPCPC,2253603:2255694,2255694:2255708:2253590:2253603:None:None,38,0.839305,False,False


130 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
130 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 79 batch 15760
Size cancer kmers-junctions 9
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_15760/ref_sample_peptides_meta.gz
(13610, 20)
9 Kmers - junctions not found in gtex. Recurrence is:
[24 16]
9 Kmers - junctions not found in gtex. Junction annotated is:
[False]
9 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 80 batch 24323
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_24323/ref_sample_peptides_meta.gz
(35592, 20)
14 Kmers - junctions not found in gtex. Recurrence is:
[6]
14 Kmers - junctions not found in gtex. Junction annotated is:
[False]
14 Kmers - junctions not 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
3651,PTPSLRPWG,29233407:29243463:29243472:29254202,29233393:29233407:29243463:29243472:29254202:2...,15,0.839305,False,False
3652,KLLSQTEPG,29261079:29261150,29261054:29261079:29261150:29261152:None:None,270,15.107483,False,False
3653,LRPWGRGCT,29233407:29243463:29243472:29254202,29233405:29233407:29243463:29243472:29254202:2...,15,0.839305,False,False
3654,LSNTFTETL,29233407:29243463,29233389:29233407:29243463:29243472:None:None,50,0.839305,False,False
3655,TPSLRPWGR,29233407:29243463:29243472:29254202,29233396:29233407:29243463:29243472:29254202:2...,15,0.839305,False,False
3656,RARSVTQDP,29261079:29261150,29261071:29261079:29261150:29261169:None:None,270,15.107483,False,False
3657,FTETLGEGL,29233407:29243463:29243472:29254202,29233401:29233407:29243463:29243472:29254202:2...,15,0.839305,False,False
3658,VPLGLLAPL,29233631:29234146,29233613:29233631:29234146:29234155:None:None,79,1.678609,False,False
3659,TETLGEGLH,29233407:29243463:29243472:29254202,29233404:29233407:29243463:29243472:29254202:2...,15,0.839305,False,False
3660,CPTPSLRPW,29233407:29243463,29233390:29233407:29243463:29243473:None:None,50,0.839305,False,False


34 Kmers - junctions not found in gtex. Junction annotated is:
[False]
34 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 90 batch 9599
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9599/ref_sample_peptides_meta.gz
(354, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[1]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 91 batch 55868
Size cancer kmers-junctions 13
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_55868/ref_sample_peptides_meta.gz
(42527, 20)
13 Kmers - junctions not found in gtex. Recurrence is:
[80]
13 Kmers - junctions not found in gtex. Junction annotated is:
[False]
13 Kmers - junctions not found in gtex. 

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,LREKAQKCD,364,24.339834,False,True,False
4,CQLREKAQK,364,24.339834,False,True,False
8,QLREKAQKC,364,24.339834,False,True,False



 Iteration 95 batch 34178
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_34178/ref_sample_peptides_meta.gz
(17372, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[5 2]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 96 batch 34270
Size cancer kmers-junctions 26
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_34270/ref_sample_peptides_meta.gz
(9238, 20)
26 Kmers - junctions not found in gtex. Recurrence is:
[24 14 13 18]
26 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
26 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 97 batch 92
Size cancer kmers-junctions 24
/cluster/work/grlab/projects/project

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,FKVAPKTLP,241229639:241230758,241230758:241230764:241229618:241229639:None:None,110,31.893576,False,False
1,MDEFKVAPK,241229639:241230758,241230758:241230773:241229627:241229639:None:None,110,31.893576,False,False
2,IMDEFKVAP,241229639:241230758,241230758:241230776:241229630:241229639:None:None,110,31.893576,False,False
3,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,True,False
4,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,False,False
...,...,...,...,...,...,...,...
305,DVPLDRQQQ,241229850:241230826,241230826:241230842:241229839:241229850:None:None,46,0.839305,False,False
306,QPQTPNDND,241230241:241233819:241230228:241229607,241233819:241233822:241230228:241230241:241229...,45,1.678609,False,False
307,DGNQPQTPN,241230241:241233819:241230228:241229607,241233819:241233831:241230228:241230241:241229...,45,1.678609,False,False
308,GNQPQTPND,241230241:241233819:241230228:241229607,241233819:241233828:241230228:241230241:241229...,45,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,FKVAPKTLP,241229639:241230758,241230758:241230764:241229618:241229639:None:None,110,31.893576,False,False
1,MDEFKVAPK,241229639:241230758,241230758:241230773:241229627:241229639:None:None,110,31.893576,False,False
2,IMDEFKVAP,241229639:241230758,241230758:241230776:241229630:241229639:None:None,110,31.893576,False,False
3,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,True,False
4,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,False,False
...,...,...,...,...,...,...,...
305,DVPLDRQQQ,241229850:241230826,241230826:241230842:241229839:241229850:None:None,46,0.839305,False,False
306,QPQTPNDND,241230241:241233819:241230228:241229607,241233819:241233822:241230228:241230241:241229...,45,1.678609,False,False
307,DGNQPQTPN,241230241:241233819:241230228:241229607,241233819:241233831:241230228:241230241:241229...,45,1.678609,False,False
308,GNQPQTPND,241230241:241233819:241230228:241229607,241233819:241233828:241230228:241230241:241229...,45,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,FKVAPKTLP,241229639:241230758,241230758:241230764:241229618:241229639:None:None,110,31.893576,False,False
1,MDEFKVAPK,241229639:241230758,241230758:241230773:241229627:241229639:None:None,110,31.893576,False,False
2,IMDEFKVAP,241229639:241230758,241230758:241230776:241229630:241229639:None:None,110,31.893576,False,False
3,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,True,False
4,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,False,False
...,...,...,...,...,...,...,...
305,DVPLDRQQQ,241229850:241230826,241230826:241230842:241229839:241229850:None:None,46,0.839305,False,False
306,QPQTPNDND,241230241:241233819:241230228:241229607,241233819:241233822:241230228:241230241:241229...,45,1.678609,False,False
307,DGNQPQTPN,241230241:241233819:241230228:241229607,241233819:241233831:241230228:241230241:241229...,45,1.678609,False,False
308,GNQPQTPND,241230241:241233819:241230228:241229607,241233819:241233828:241230228:241230241:241229...,45,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,FKVAPKTLP,241229639:241230758,241230758:241230764:241229618:241229639:None:None,110,31.893576,False,False
1,MDEFKVAPK,241229639:241230758,241230758:241230773:241229627:241229639:None:None,110,31.893576,False,False
2,IMDEFKVAP,241229639:241230758,241230758:241230776:241229630:241229639:None:None,110,31.893576,False,False
3,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,True,False
4,KVAPKTLPW,241229639:241230758,241230758:241230761:241229615:241229639:None:None,110,31.893576,False,False
...,...,...,...,...,...,...,...
305,DVPLDRQQQ,241229850:241230826,241230826:241230842:241229839:241229850:None:None,46,0.839305,False,False
306,QPQTPNDND,241230241:241233819:241230228:241229607,241233819:241233822:241230228:241230241:241229...,45,1.678609,False,False
307,DGNQPQTPN,241230241:241233819:241230228:241229607,241233819:241233831:241230228:241230241:241229...,45,1.678609,False,False
308,GNQPQTPND,241230241:241233819:241230228:241229607,241233819:241233828:241230228:241230241:241229...,45,1.678609,False,False


310 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
310 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 110 batch 27731
Size cancer kmers-junctions 159
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_27731/ref_sample_peptides_meta.gz
(11903, 20)
159 Kmers - junctions not found in gtex. Recurrence is:
[33 22  8  1  4 14 18  7  6 12 10 29  2]
159 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
159 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 111 batch 40736
Size cancer kmers-junctions 33
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_40736/ref_sample_peptides_meta.gz
(60221, 20)
33 Kmers - junctions not found in gtex. Recurrence is:
[  5  91  57 114  14]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PFAPSSTGP,184183493:184183650,184183484:184183493:184183650:184183668:None:None,5,0.839305,False,False
1,APSSTGPGA,184183493:184183650,184183490:184183493:184183650:184183674:None:None,5,0.839305,False,False
2,FAPSSTGPG,184183493:184183650,184183487:184183493:184183650:184183671:None:None,5,0.839305,False,False
3,VPFAPSSTG,184183493:184183650,184183481:184183493:184183650:184183665:None:None,5,0.839305,False,False
4,GYLSGMPEC,184180183:184181129,184180182:184180183:184181129:184181155:None:None,91,0.839305,False,False
5,FGYLSGMPE,184180183:184181129,184180179:184180183:184181129:184181152:None:None,91,0.839305,False,False
6,KSENAIVWK,184181226:184182227:184182248:184182756,184181221:184181226:184182227:184182248:184182...,57,3.357219,False,True
7,RARMPSCGR,184181226:184182227:184182248:184182756,184181222:184181226:184182227:184182248:184182...,57,3.357219,False,True
8,FEVPFAPSS,184182868:184183481:184183493:184183650,184182862:184182868:184183481:184183493:184183...,5,0.839305,False,True
9,EVPFAPSST,184182868:184183481:184183493:184183650,184182865:184182868:184183481:184183493:184183...,5,0.839305,False,True


33 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
33 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 112 batch 21949
Size cancer kmers-junctions 1
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_21949/ref_sample_peptides_meta.gz
(9287, 20)
1 Kmers - junctions not found in gtex. Recurrence is:
[54]
1 Kmers - junctions not found in gtex. Junction annotated is:
[ True]
1 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 113 batch 15971
Size cancer kmers-junctions 1
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_15971/ref_sample_peptides_meta.gz
(50198, 20)
1 Kmers - junctions not found in gtex. Recurrence is:
[108]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4381,YEVFEKDPW,22924357:22924655,22924655:22924679:22924354:22924357:None:None,108,0.839305,False,False


1 Kmers - junctions not found in gtex. Junction annotated is:
[False]
1 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 114 batch 49872
Size cancer kmers-junctions 17
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_49872/ref_sample_peptides_meta.gz
(41421, 20)
17 Kmers - junctions not found in gtex. Recurrence is:
[62 12]
17 Kmers - junctions not found in gtex. Junction annotated is:
[False]
17 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 115 batch 29019
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_29019/ref_sample_peptides_meta.gz
(25901, 20)
14 Kmers - junctions not found in gtex. Recurrence is:
[128]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4399,PLFRSTQGQ,45421263:45421366,45421366:45421387:45421257:45421263:None:None,128,2.517914,False,False
4400,STQGQSPWQ,45421263:45421366,45421366:45421375:45421245:45421263:None:None,128,2.517914,True,False
4401,TQGQSPWQE,45421263:45421366,45421366:45421372:45421242:45421263:None:None,128,2.517914,False,False
4402,TQGQSPWQE,45421263:45421366,45421366:45421372:45421242:45421263:None:None,128,2.517914,True,False
4403,LFRSTQGQS,45421263:45421366,45421366:45421384:45421254:45421263:None:None,128,2.517914,False,False
4404,KPLFRSTQG,45421263:45421366,45421366:45421390:45421260:45421263:None:None,128,2.517914,True,False
4405,LFRSTQGQS,45421263:45421366,45421366:45421384:45421254:45421263:None:None,128,2.517914,True,False
4406,FRSTQGQSP,45421263:45421366,45421366:45421381:45421251:45421263:None:None,128,2.517914,False,False
4407,KPLFRSTQG,45421263:45421366,45421366:45421390:45421260:45421263:None:None,128,2.517914,False,False
4408,RSTQGQSPW,45421263:45421366,45421366:45421378:45421248:45421263:None:None,128,2.517914,False,False


14 Kmers - junctions not found in gtex. Junction annotated is:
[False]
14 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 116 batch 9570
Size cancer kmers-junctions 30
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9570/ref_sample_peptides_meta.gz
(49307, 20)
30 Kmers - junctions not found in gtex. Recurrence is:
[27 58 53 15]
30 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
30 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 117 batch 59891
Size cancer kmers-junctions 24
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_59891/ref_sample_peptides_meta.gz
(27485, 20)
24 Kmers - junctions not found in gtex. Recurrence is:
[  3 142]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4443,PPTQMLQGD,152931776:152959787,152931754:152931776:152959787:152959792:None:None,3,9.232351,False,False
4444,GSERPGRTG,152945306:152959738,152945304:152945306:152959738:152959763:None:None,142,12.58957,False,False
4445,PTGTLQYQA,152917155:152918982,152917149:152917155:152918982:152919003:None:None,3,1.678609,False,False
4446,STRAPTGTL,152917155:152918982,152917137:152917155:152918982:152918991:None:None,3,1.678609,False,False
4447,TRVGEAWQD,152945306:152959738,152945300:152945306:152959738:152959759:None:None,142,12.58957,False,False
4448,TQMLQGDPA,152931776:152959787,152931760:152931776:152959787:152959798:None:None,3,9.232351,False,False
4449,NTDAARRPS,152931776:152959787,152931759:152931776:152959787:152959797:None:None,3,9.232351,False,False
4450,TGTLQYQAQ,152917155:152918982,152917152:152917155:152918982:152919006:None:None,3,1.678609,False,False
4451,APNTDAARR,152931776:152959787,152931753:152931776:152959787:152959791:None:None,3,9.232351,False,False
4452,PSTRAPTGT,152917155:152918982,152917134:152917155:152918982:152918988:None:None,3,1.678609,False,False


24 Kmers - junctions not found in gtex. Junction annotated is:
[False]
24 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 118 batch 27574
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_27574/ref_sample_peptides_meta.gz
(18741, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[18]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 119 batch 34260
Size cancer kmers-junctions 23
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_34260/ref_sample_peptides_meta.gz
(30784, 20)
23 Kmers - junctions not found in gtex. Recurrence is:
[ 7 89  2]
23 Kmers - junctions not found in gtex. Junction annotated is:
[False]
23 Kmers - junctions not found in

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,PAAGVTDEA,109,10.071656,False,False,False
4,AAGVTDEAP,109,10.071656,False,False,False
8,NQQQESQMR,109,10.071656,False,False,False
12,IANQQQESQ,109,10.071656,False,False,False
16,QPAAGVTDE,109,10.071656,False,False,False
20,LPTSSRSHR,109,10.071656,False,False,False
24,CQPAAGVTD,109,10.071656,False,False,False
28,QQESQMRPP,109,10.071656,False,False,False
32,VIANQQQES,109,10.071656,False,False,False
36,CHCQPAAGV,109,10.071656,False,False,False


55 Kmers - junctions not found in gtex. Recurrence is:
[ 12  11   1 116]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,AGCMLFPLR,49856992:49857069,49856974:49856992:49857069:49857078:None:None,12,0.839305,False,False
1,FPLRPERLR,49856992:49857069,49856989:49856992:49857069:49857093:None:None,12,0.839305,False,False
2,LFPLRPERL,49856992:49857069,49856986:49856992:49857069:49857090:None:None,12,0.839305,False,False
3,MLFPLRPER,49856992:49857069,49856983:49856992:49857069:49857087:None:None,12,0.839305,False,False
4,GCMLFPLRP,49856992:49857069,49856977:49856992:49857069:49857081:None:None,12,0.839305,False,False
5,CMLFPLRPE,49856992:49857069,49856980:49856992:49857069:49857084:None:None,12,0.839305,False,False
6,LAGCMLFPL,49854888:49856974:49856992:49857069,49854885:49854888:49856974:49856992:49857069:4...,11,0.839305,False,True
7,FAGCMLFPL,49855077:49856974:49856992:49857069,49855074:49855077:49856974:49856992:49857069:4...,12,0.839305,False,True
8,GDMDNGFRR,49858615:49858637:49858653:49860055,49858610:49858615:49858637:49858653:49860055:4...,1,1.678609,False,True
9,PGDMDNGFR,49858615:49858637:49858653:49860055,49858607:49858615:49858637:49858653:49860055:4...,1,1.678609,False,True


55 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
55 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 130 batch 51190
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_51190/ref_sample_peptides_meta.gz
(21704, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[10]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 131 batch 4764
Size cancer kmers-junctions 13
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_4764/ref_sample_peptides_meta.gz
(11357, 20)
13 Kmers - junctions not found in gtex. Recurrence is:
[52  4]
13 Kmers - junctions not found in gtex. Junction annotated is:
[False]
13 Kmers - junctions not found i

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4747,LRMAFQVEP,2722666:2726314,2722647:2722666:2726314:2726322:None:None,125,0.839305,False,False
4748,NEAMLQRKW,2719705:2726351:2726373:2727292,2719701:2719705:2726351:2726373:2727292:2727293,11,0.839305,False,True
4749,VDGEGKGGS,2719700:2723311,2719689:2719700:2723311:2723327:None:None,153,1.678609,False,False
4750,AFQVEPSLA,2722666:2726314,2722656:2722666:2726314:2726331:None:None,125,0.839305,False,False
4751,DAVVDGEGK,2719700:2723311,2719680:2719700:2723311:2723318:None:None,153,1.678609,False,False
4752,RMAFQVEPS,2722666:2726314,2722650:2722666:2726314:2726325:None:None,125,0.839305,False,False
4753,VVDGEGKGG,2719700:2723311,2719686:2719700:2723311:2723324:None:None,153,1.678609,False,False
4754,GVSGGAISS,2722666:2726314,2722655:2722666:2726314:2726330:None:None,125,0.839305,False,False
4755,NEAMLQRKC,2719705:2726351:2726373:2738199,2719701:2719705:2726351:2726373:2738199:2738200,18,0.839305,False,True
4756,VDGENEAML,2719705:2726351,2719689:2719705:2726351:2726362:None:None,18,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4747,LRMAFQVEP,2722666:2726314,2722647:2722666:2726314:2726322:None:None,125,0.839305,False,False
4748,NEAMLQRKW,2719705:2726351:2726373:2727292,2719701:2719705:2726351:2726373:2727292:2727293,11,0.839305,False,True
4749,VDGEGKGGS,2719700:2723311,2719689:2719700:2723311:2723327:None:None,153,1.678609,False,False
4750,AFQVEPSLA,2722666:2726314,2722656:2722666:2726314:2726331:None:None,125,0.839305,False,False
4751,DAVVDGEGK,2719700:2723311,2719680:2719700:2723311:2723318:None:None,153,1.678609,False,False
4752,RMAFQVEPS,2722666:2726314,2722650:2722666:2726314:2726325:None:None,125,0.839305,False,False
4753,VVDGEGKGG,2719700:2723311,2719686:2719700:2723311:2723324:None:None,153,1.678609,False,False
4754,GVSGGAISS,2722666:2726314,2722655:2722666:2726314:2726330:None:None,125,0.839305,False,False
4755,NEAMLQRKC,2719705:2726351:2726373:2738199,2719701:2719705:2726351:2726373:2738199:2738200,18,0.839305,False,True
4756,VDGENEAML,2719705:2726351,2719689:2719705:2726351:2726362:None:None,18,0.839305,False,False


27 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
27 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 134 batch 3836
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_3836/ref_sample_peptides_meta.gz
(15635, 20)
14 Kmers - junctions not found in gtex. Recurrence is:
[6]
14 Kmers - junctions not found in gtex. Junction annotated is:
[False]
14 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 135 batch 48569
Size cancer kmers-junctions 94
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_48569/ref_sample_peptides_meta.gz
(18124, 20)
94 Kmers - junctions not found in gtex. Recurrence is:
[ 9 12  8 18 46 33]
94 Kmers - junctions not found in gtex. Junction annotated is:
[False]
94 Kmers - 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,CPGPPLQPV,47853256:47853308,47853308:47853322:47853243:47853256:None:None,54,0.839305,False,False
1,GPPLQPVAS,47853256:47853308,47853308:47853316:47853237:47853256:None:None,54,0.839305,False,False
2,LCPGPPLQP,47853256:47853308,47853308:47853325:47853246:47853256:None:None,54,0.839305,False,False
3,PLCPGPPLQ,47853256:47853308,47853308:47853328:47853249:47853256:None:None,54,0.839305,False,False
4,PGPPLQPVA,47853256:47853308,47853308:47853319:47853240:47853256:None:None,54,0.839305,False,False
5,APLCPGPPL,47853256:47853308,47853308:47853331:47853252:47853256:None:None,54,0.839305,False,False
6,VRSKVSSKC,47857476:47870857,47870857:47870869:47857461:47857476:None:None,15,14.268179,False,False
7,RSKVSSKCG,47857476:47870857,47870857:47870866:47857458:47857476:None:None,15,14.268179,False,False
8,LKNVRSKVS,47857476:47870857,47870857:47870878:47857470:47857476:None:None,15,14.268179,False,False
9,NVRSKVSSK,47857476:47870857,47870857:47870872:47857464:47857476:None:None,15,14.268179,False,False


19 Kmers - junctions not found in gtex. Junction annotated is:
[False]
19 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 139 batch 44979
Size cancer kmers-junctions 16
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_44979/ref_sample_peptides_meta.gz
(421273, 20)
16 Kmers - junctions not found in gtex. Recurrence is:
[185   6]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4914,KCVRVMLLV,82276215:82276469,82276469:82276481:82276200:82276215:None:None,185,960.164504,False,False
4915,RVMLLVIFL,82276215:82276469,82276469:82276472:82276191:82276215:None:None,185,960.164504,False,False
4916,VAGFGRKDQ,82276121:82276215,82276215:82276237:82276116:82276121:None:None,6,29.375662,False,False
4917,CVRVMLLVI,82276215:82276469,82276469:82276478:82276197:82276215:None:None,185,960.164504,False,False
4918,EVCKGHAVG,82276215:82276469,82276469:82276482:82276201:82276215:None:None,185,960.164504,False,False
4919,RKCVRVMLL,82276215:82276469,82276469:82276484:82276203:82276215:None:None,185,960.164504,False,False
4920,FGRKDQDHK,82276121:82276215,82276215:82276228:82276107:82276121:None:None,6,29.375662,False,False
4921,GRKDQDHKY,82276121:82276215,82276215:82276225:82276104:82276121:None:None,6,29.375662,False,False
4922,VRVMLLVIF,82276215:82276469,82276469:82276475:82276194:82276215:None:None,185,960.164504,False,False
4923,AGFGRKDQD,82276121:82276215,82276215:82276234:82276113:82276121:None:None,6,29.375662,False,False


16 Kmers - junctions not found in gtex. Junction annotated is:
[False]
16 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 140 batch 917
Size cancer kmers-junctions 20
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_917/ref_sample_peptides_meta.gz
(19800, 20)
20 Kmers - junctions not found in gtex. Recurrence is:
[ 3 17  4]
20 Kmers - junctions not found in gtex. Junction annotated is:
[False]
20 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 141 batch 20181
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_20181/ref_sample_peptides_meta.gz
(37962, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[19]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gte

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
4999,LSLLLLSSS,49758718:49762924,49758699:49758718:49762924:49762932:None:None,102,0.839305,False,False
5000,SSSPSTPHF,49758718:49762924,49758717:49758718:49762924:49762950:None:None,102,0.839305,False,False
5001,LLSSSPSTP,49758718:49762924,49758711:49758718:49762924:49762944:None:None,102,0.839305,False,False
5002,LSSSPSTPH,49758718:49762924,49758714:49758718:49762924:49762947:None:None,102,0.839305,False,False


4 Kmers - junctions not found in gtex. Junction annotated is:
[False]
4 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 146 batch 634
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_634/ref_sample_peptides_meta.gz
(4906, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[29]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 147 batch 51634
Size cancer kmers-junctions 5
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_51634/ref_sample_peptides_meta.gz
(132440, 20)
5 Kmers - junctions not found in gtex. Recurrence is:
[11]
5 Kmers - junctions not found in gtex. Junction annotated is:
[False]
5 Kmers - junctions not found in gtex. RF annota

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,LQRKFKKDI,83,0.839305,False,False,False



 Iteration 152 batch 33564
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_33564/ref_sample_peptides_meta.gz
(7220, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[1]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 153 batch 49830
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_49830/ref_sample_peptides_meta.gz
(481832, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[3]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 154 batch 19601
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_ge

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,KLKEEEEQL,108,1.678609,False,False,True
35,KTEGGRGAA,108,1.678609,False,False,True
70,KKLKEEEEQ,108,1.678609,False,False,True
105,QKTEGGRGA,108,1.678609,False,False,True
140,LKEEEEQLP,108,1.678609,False,False,True
175,TEGGRGAAS,108,1.678609,False,False,True


164 Kmers - junctions not found in gtex. Recurrence is:
[ 11   6 187 236  52 105   5   9  56 136  20 370]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EGISQLPSG,34244856:34244916,34244845:34244856:34244916:34244932:None:None,11,0.839305,False,False
1,GGHLAASFW,34244856:34244916,34244844:34244856:34244916:34244931:None:None,11,0.839305,False,False
2,RSFLLGLDS,34244856:34244916,34244855:34244856:34244916:34244942:None:None,11,0.839305,False,False
3,RASRSFLLG,34244856:34244916,34244846:34244856:34244916:34244933:None:None,11,0.839305,False,False
4,KRRASRSFL,34244856:34244916,34244840:34244856:34244916:34244927:None:None,11,0.839305,False,False
...,...,...,...,...,...,...,...
159,VEGGRGGHL,34243522:34244833,34243518:34243522:34244833:34244856:None:None,370,67.144371,False,False
160,KKLVEGGRG,34243522:34244833,34243509:34243522:34244833:34244847:None:None,370,67.144371,False,False
161,PKKLVEGGR,34243522:34244833,34243506:34243522:34244833:34244844:None:None,370,67.144371,False,False
162,GRPKKLVEG,34243522:34244833,34243500:34243522:34244833:34244838:None:None,370,67.144371,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EGISQLPSG,34244856:34244916,34244845:34244856:34244916:34244932:None:None,11,0.839305,False,False
1,GGHLAASFW,34244856:34244916,34244844:34244856:34244916:34244931:None:None,11,0.839305,False,False
2,RSFLLGLDS,34244856:34244916,34244855:34244856:34244916:34244942:None:None,11,0.839305,False,False
3,RASRSFLLG,34244856:34244916,34244846:34244856:34244916:34244933:None:None,11,0.839305,False,False
4,KRRASRSFL,34244856:34244916,34244840:34244856:34244916:34244927:None:None,11,0.839305,False,False
...,...,...,...,...,...,...,...
159,VEGGRGGHL,34243522:34244833,34243518:34243522:34244833:34244856:None:None,370,67.144371,False,False
160,KKLVEGGRG,34243522:34244833,34243509:34243522:34244833:34244847:None:None,370,67.144371,False,False
161,PKKLVEGGR,34243522:34244833,34243506:34243522:34244833:34244844:None:None,370,67.144371,False,False
162,GRPKKLVEG,34243522:34244833,34243500:34243522:34244833:34244838:None:None,370,67.144371,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EGISQLPSG,34244856:34244916,34244845:34244856:34244916:34244932:None:None,11,0.839305,False,False
1,GGHLAASFW,34244856:34244916,34244844:34244856:34244916:34244931:None:None,11,0.839305,False,False
2,RSFLLGLDS,34244856:34244916,34244855:34244856:34244916:34244942:None:None,11,0.839305,False,False
3,RASRSFLLG,34244856:34244916,34244846:34244856:34244916:34244933:None:None,11,0.839305,False,False
4,KRRASRSFL,34244856:34244916,34244840:34244856:34244916:34244927:None:None,11,0.839305,False,False
...,...,...,...,...,...,...,...
159,VEGGRGGHL,34243522:34244833,34243518:34243522:34244833:34244856:None:None,370,67.144371,False,False
160,KKLVEGGRG,34243522:34244833,34243509:34243522:34244833:34244847:None:None,370,67.144371,False,False
161,PKKLVEGGR,34243522:34244833,34243506:34243522:34244833:34244844:None:None,370,67.144371,False,False
162,GRPKKLVEG,34243522:34244833,34243500:34243522:34244833:34244838:None:None,370,67.144371,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EGISQLPSG,34244856:34244916,34244845:34244856:34244916:34244932:None:None,11,0.839305,False,False
1,GGHLAASFW,34244856:34244916,34244844:34244856:34244916:34244931:None:None,11,0.839305,False,False
2,RSFLLGLDS,34244856:34244916,34244855:34244856:34244916:34244942:None:None,11,0.839305,False,False
3,RASRSFLLG,34244856:34244916,34244846:34244856:34244916:34244933:None:None,11,0.839305,False,False
4,KRRASRSFL,34244856:34244916,34244840:34244856:34244916:34244927:None:None,11,0.839305,False,False
...,...,...,...,...,...,...,...
159,VEGGRGGHL,34243522:34244833,34243518:34243522:34244833:34244856:None:None,370,67.144371,False,False
160,KKLVEGGRG,34243522:34244833,34243509:34243522:34244833:34244847:None:None,370,67.144371,False,False
161,PKKLVEGGR,34243522:34244833,34243506:34243522:34244833:34244844:None:None,370,67.144371,False,False
162,GRPKKLVEG,34243522:34244833,34243500:34243522:34244833:34244838:None:None,370,67.144371,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EGISQLPSG,34244856:34244916,34244845:34244856:34244916:34244932:None:None,11,0.839305,False,False
1,GGHLAASFW,34244856:34244916,34244844:34244856:34244916:34244931:None:None,11,0.839305,False,False
2,RSFLLGLDS,34244856:34244916,34244855:34244856:34244916:34244942:None:None,11,0.839305,False,False
3,RASRSFLLG,34244856:34244916,34244846:34244856:34244916:34244933:None:None,11,0.839305,False,False
4,KRRASRSFL,34244856:34244916,34244840:34244856:34244916:34244927:None:None,11,0.839305,False,False
...,...,...,...,...,...,...,...
159,VEGGRGGHL,34243522:34244833,34243518:34243522:34244833:34244856:None:None,370,67.144371,False,False
160,KKLVEGGRG,34243522:34244833,34243509:34243522:34244833:34244847:None:None,370,67.144371,False,False
161,PKKLVEGGR,34243522:34244833,34243506:34243522:34244833:34244844:None:None,370,67.144371,False,False
162,GRPKKLVEG,34243522:34244833,34243500:34243522:34244833:34244838:None:None,370,67.144371,False,False


164 Kmers - junctions not found in gtex. Junction annotated is:
[False]
164 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 159 batch 45757
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_45757/ref_sample_peptides_meta.gz
(5222, 20)
14 Kmers - junctions not found in gtex. Recurrence is:
[9 4]
14 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
14 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 160 batch 29785
Size cancer kmers-junctions 1
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_29785/ref_sample_peptides_meta.gz
(26747, 20)
1 Kmers - junctions not found in gtex. Recurrence is:
[19]
1 Kmers - junctions not found in gtex. Junction annotated is:
[False]
1 Kmers - junctions not found 

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,EASTRSGNT,16,0.839305,False,False,True
1,AGGKHEERK,16,0.839305,False,False,True
2,QEASTRSGN,16,0.839305,False,False,True
3,GGKHEERKY,16,0.839305,False,False,True
4,GAWWRSPAS,24,0.839305,False,True,True
6,AGAWWRSPA,24,0.839305,False,True,True
8,ARGGDHRQA,24,0.839305,False,True,True
10,LARGGDHRQ,24,0.839305,False,True,True
12,GKHEERKYT,16,0.839305,False,True,True
76,GAWWRSPAA,23,0.839305,False,False,True


1898 Kmers - junctions not found in gtex. Recurrence is:
[103  20  79  71  30  63   6  23 183  10 143   9  39   3   4 132 212 162
  41  67  57  27 131  25 156  97  14  37 153  29  19  51 109  16 113 191
  32 123  21  47  64   8  13  40  28  60  53  45  35 119  61  22 134 199
  80  36  85   5 105 222  68  92  43  55   7  98 120  33  73  81 164  62
 121 140  18  26  50  46  66  78   1 168  12  48 126  44 150  93  31 100
  34   2 169  11 171  76  42  69 198]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MTAGACPWM,76302714:76302988,76302712:76302714:76302988:76303013:None:None,103,1.678609,True,False
1,TERRHRVPG,76302721:76302891:76302900:76302999,76302715:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
2,ERRHRVPGC,76302721:76302891:76302900:76302999,76302718:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
3,MTERRHRVP,76302721:76302891:76302900:76302999,76302712:76302721:76302891:76302900:76302999:7...,20,0.839305,True,False
4,WPGYVRPHA,76302878:76304053:76304060:76304160,76302862:76302878:76304053:76304060:76304160:7...,79,0.839305,True,False
...,...,...,...,...,...,...,...
1893,PGRADGASR,76303040:76303842,76303026:76303040:76303842:76303855:None:None,29,0.839305,False,False
1894,RRVSLDVNH,76302724:76302997,76302721:76302724:76302997:76303021:None:None,198,5.035828,True,False
1895,ERRVSLDVN,76302724:76302997,76302718:76302724:76302997:76303018:None:None,198,5.035828,True,False
1896,MTERRVSLD,76302724:76302997,76302712:76302724:76302997:76303012:None:None,198,5.035828,True,False


1898 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
1898 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 174 batch 32918
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_32918/ref_sample_peptides_meta.gz
(30567, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[13]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 175 batch 47320
Size cancer kmers-junctions 27
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_47320/ref_sample_peptides_meta.gz
(36567, 20)
Size cancer kmers-junctions + all GTEX peptides 195


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,GHWGHIEQW,226,4.196523,False,True,True


26 Kmers - junctions not found in gtex. Recurrence is:
[13  2  8 33]
26 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
26 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 176 batch 55783
Size cancer kmers-junctions 49
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_55783/ref_sample_peptides_meta.gz
(27308, 20)
49 Kmers - junctions not found in gtex. Recurrence is:
[ 10  12  35  23 155  57  16]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MDDTCSDEK,35067968:35068049,35068049:35068057:35067949:35067968:None:None,10,0.839305,True,False
1,PKMDDTCSD,35067968:35068049,35068049:35068063:35067955:35067968:None:None,10,0.839305,False,False
2,MDDTCSDEK,35067968:35068049,35068049:35068057:35067949:35067968:None:None,10,0.839305,False,False
3,KMDDTCSDE,35067968:35068049,35068049:35068060:35067952:35067968:None:None,10,0.839305,False,False
4,RAVANETHG,35061179:35063017,35063017:35063023:35061158:35061179:None:None,12,26.857748,False,False
5,AVANETHGH,35061179:35063017,35063017:35063020:35061155:35061179:None:None,12,26.857748,False,False
6,ARAVANETH,35061179:35063017,35063017:35063026:35061161:35061179:None:None,12,26.857748,False,False
7,EIIFIDELD,35062259:35062337,35062337:35062345:35062240:35062259:None:None,35,0.839305,False,False
8,PEIIFIDEL,35062259:35062337,35062337:35062348:35062243:35062259:None:None,35,0.839305,False,False
9,FDELDSIAI,35059530:35059746,35059746:35059771:35059528:35059530:None:None,23,0.839305,False,False


49 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
49 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 177 batch 45841
Size cancer kmers-junctions 27
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_45841/ref_sample_peptides_meta.gz
(14599, 20)
27 Kmers - junctions not found in gtex. Recurrence is:
[76 12  5  8]
27 Kmers - junctions not found in gtex. Junction annotated is:
[False]
27 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 178 batch 9475
Size cancer kmers-junctions 51
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9475/ref_sample_peptides_meta.gz
(18393, 20)
51 Kmers - junctions not found in gtex. Recurrence is:
[  8   6 102   2]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
7438,AQVPGPGGH,64286881:64289142,64286875:64286881:64289142:64289163:None:None,8,3.357219,False,False
7439,TLGWGAWPW,64289247:64289365,64289240:64289247:64289365:64289385:None:None,6,13.428874,False,False
7440,PLTLGWGAW,64289247:64289365,64289234:64289247:64289365:64289379:None:None,6,13.428874,False,False
7441,LYFAQVPGP,64286881:64289142,64286866:64286881:64289142:64289154:None:None,8,3.357219,True,False
7442,MNLYFAQVP,64286881:64289142,64286860:64286881:64289142:64289148:None:None,8,3.357219,False,False
7443,PPHPRMGGM,64289247:64289365,64289233:64289247:64289365:64289378:None:None,6,13.428874,False,False
7444,GTSVCQAAT,64288082:64289155,64288064:64288082:64289155:64289164:None:None,102,7.553742,False,False
7445,GSGSLCRPP,64289060:64289344,64289052:64289060:64289344:64289363:None:None,2,0.839305,False,False
7446,AQVPGPGGH,64286881:64289142,64286875:64286881:64289142:64289163:None:None,8,3.357219,True,False
7447,APVPLTLGW,64289247:64289365,64289225:64289247:64289365:64289370:None:None,6,13.428874,False,False


51 Kmers - junctions not found in gtex. Junction annotated is:
[False]
51 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 179 batch 57336
Size cancer kmers-junctions 26
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_57336/ref_sample_peptides_meta.gz
(16853, 20)
26 Kmers - junctions not found in gtex. Recurrence is:
[112 143]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
7489,LFQAPASDP,131860521:131860634,131860634:131860648:131860508:131860521:None:None,112,0.839305,False,False
7490,FQAPASDPT,131860521:131860634,131860634:131860645:131860505:131860521:None:None,112,0.839305,False,False
7491,SYIKLFLAP,131860528:131860641,131860641:131860660:131860520:131860528:None:None,143,1.678609,False,False
7492,QAPASDPTQ,131860521:131860634,131860634:131860642:131860502:131860521:None:None,112,0.839305,False,False
7493,KAVPGPSLR,131860528:131860641,131860641:131860652:131860512:131860528:None:None,143,1.678609,False,False
7494,YIKLFLAPA,131860528:131860641,131860641:131860657:131860517:131860528:None:None,143,1.678609,False,False
7495,AVPGPSLRP,131860521:131860634,131860634:131860649:131860509:131860521:None:None,112,0.839305,False,False
7496,PGPSLRPHP,131860528:131860641,131860641:131860643:131860503:131860528:None:None,143,1.678609,False,False
7497,AVPGPSLRP,131860528:131860641,131860641:131860649:131860509:131860528:None:None,143,1.678609,False,False
7498,VPGPSLRPH,131860521:131860634,131860634:131860646:131860506:131860521:None:None,112,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
7489,LFQAPASDP,131860521:131860634,131860634:131860648:131860508:131860521:None:None,112,0.839305,False,False
7490,FQAPASDPT,131860521:131860634,131860634:131860645:131860505:131860521:None:None,112,0.839305,False,False
7491,SYIKLFLAP,131860528:131860641,131860641:131860660:131860520:131860528:None:None,143,1.678609,False,False
7492,QAPASDPTQ,131860521:131860634,131860634:131860642:131860502:131860521:None:None,112,0.839305,False,False
7493,KAVPGPSLR,131860528:131860641,131860641:131860652:131860512:131860528:None:None,143,1.678609,False,False
7494,YIKLFLAPA,131860528:131860641,131860641:131860657:131860517:131860528:None:None,143,1.678609,False,False
7495,AVPGPSLRP,131860521:131860634,131860634:131860649:131860509:131860521:None:None,112,0.839305,False,False
7496,PGPSLRPHP,131860528:131860641,131860641:131860643:131860503:131860528:None:None,143,1.678609,False,False
7497,AVPGPSLRP,131860528:131860641,131860641:131860649:131860509:131860528:None:None,143,1.678609,False,False
7498,VPGPSLRPH,131860521:131860634,131860634:131860646:131860506:131860521:None:None,112,0.839305,False,False


26 Kmers - junctions not found in gtex. Junction annotated is:
[False]
26 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 180 batch 21829
Size cancer kmers-junctions 191
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_21829/ref_sample_peptides_meta.gz
(57608, 20)
191 Kmers - junctions not found in gtex. Recurrence is:
[26  3  4 16 67 79  7  9 17 81 20 14]
191 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
191 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 181 batch 16048
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_16048/ref_sample_peptides_meta.gz
(26916, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[5 9]
12 Kmers - junctions not found in gtex. Junction annotat

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
7764,MIVAGANYG,153786841:153787067,153787067:153787077:153786824:153786841:None:None,29,0.839305,False,False
7765,VCAVFETAT,153786811:153786868:153786800:153786449,153786868:153786878:153786800:153786811:153786...,44,0.839305,False,True
7766,FENMIVAGA,153786841:153787067,153787067:153787086:153786833:153786841:None:None,29,0.839305,False,False
7767,NVCAVFETA,153786811:153786868:153786800:153786449,153786868:153786881:153786800:153786811:153786...,44,0.839305,False,True
7768,NNVCAVFET,153786811:153786868,153786868:153786884:153786800:153786811:None:None,44,0.839305,False,False
7769,NMIVAGANY,153786841:153787067,153787067:153787080:153786827:153786841:None:None,29,0.839305,False,False
7770,PNLYGHVYA,153786824:153786893,153786893:153786908:153786812:153786824:None:None,115,0.839305,False,False
7771,VMPNLYGHV,153786824:153786893,153786893:153786914:153786818:153786824:None:None,115,0.839305,False,False
7772,VNNVCAVFE,153786811:153786868,153786868:153786887:153786803:153786811:None:None,44,0.839305,False,False
7773,CAVFETATR,153786811:153786868:153786800:153786449,153786868:153786875:153786800:153786811:153786...,44,0.839305,False,True


19 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
19 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 186 batch 34208
Size cancer kmers-junctions 11
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_34208/ref_sample_peptides_meta.gz
(56572, 20)
11 Kmers - junctions not found in gtex. Recurrence is:
[ 5 25]
11 Kmers - junctions not found in gtex. Junction annotated is:
[False]
11 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 187 batch 35609
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_35609/ref_sample_peptides_meta.gz
(37833, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[4]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found 

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,RPPGFHFQL,108,0.839305,True,False,True


216 Kmers - junctions not found in gtex. Recurrence is:
[ 33 150  42   9  30  23  15 101  84 114  52  99   2  50  94  43  47]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SPRPSPHSA,63521591:63521934,63521590:63521591:63521934:63521960:None:None,33,0.839305,False,False
1,PGLPAPLPT,63521591:63521934,63521585:63521591:63521934:63521955:None:None,33,0.839305,False,False
2,PHPPGLPAP,63521591:63521934,63521576:63521591:63521934:63521946:None:None,33,0.839305,False,False
3,PRSPRPSPH,63521591:63521934,63521584:63521591:63521934:63521954:None:None,33,0.839305,False,False
4,HSPPPRSPR,63521591:63521934,63521572:63521591:63521934:63521942:None:None,33,0.839305,False,False
...,...,...,...,...,...,...,...
211,ESAEHSPPS,63521764:63521963,63521750:63521764:63521963:63521976:None:None,47,0.839305,False,False
212,SAEHSPPSP,63521764:63521963,63521753:63521764:63521963:63521979:None:None,47,0.839305,False,False
213,AEHSPPSPP,63521764:63521963,63521756:63521764:63521963:63521982:None:None,47,0.839305,False,False
214,EHSPPSPPP,63521764:63521963,63521759:63521764:63521963:63521985:None:None,47,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SPRPSPHSA,63521591:63521934,63521590:63521591:63521934:63521960:None:None,33,0.839305,False,False
1,PGLPAPLPT,63521591:63521934,63521585:63521591:63521934:63521955:None:None,33,0.839305,False,False
2,PHPPGLPAP,63521591:63521934,63521576:63521591:63521934:63521946:None:None,33,0.839305,False,False
3,PRSPRPSPH,63521591:63521934,63521584:63521591:63521934:63521954:None:None,33,0.839305,False,False
4,HSPPPRSPR,63521591:63521934,63521572:63521591:63521934:63521942:None:None,33,0.839305,False,False
...,...,...,...,...,...,...,...
211,ESAEHSPPS,63521764:63521963,63521750:63521764:63521963:63521976:None:None,47,0.839305,False,False
212,SAEHSPPSP,63521764:63521963,63521753:63521764:63521963:63521979:None:None,47,0.839305,False,False
213,AEHSPPSPP,63521764:63521963,63521756:63521764:63521963:63521982:None:None,47,0.839305,False,False
214,EHSPPSPPP,63521764:63521963,63521759:63521764:63521963:63521985:None:None,47,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SPRPSPHSA,63521591:63521934,63521590:63521591:63521934:63521960:None:None,33,0.839305,False,False
1,PGLPAPLPT,63521591:63521934,63521585:63521591:63521934:63521955:None:None,33,0.839305,False,False
2,PHPPGLPAP,63521591:63521934,63521576:63521591:63521934:63521946:None:None,33,0.839305,False,False
3,PRSPRPSPH,63521591:63521934,63521584:63521591:63521934:63521954:None:None,33,0.839305,False,False
4,HSPPPRSPR,63521591:63521934,63521572:63521591:63521934:63521942:None:None,33,0.839305,False,False
...,...,...,...,...,...,...,...
211,ESAEHSPPS,63521764:63521963,63521750:63521764:63521963:63521976:None:None,47,0.839305,False,False
212,SAEHSPPSP,63521764:63521963,63521753:63521764:63521963:63521979:None:None,47,0.839305,False,False
213,AEHSPPSPP,63521764:63521963,63521756:63521764:63521963:63521982:None:None,47,0.839305,False,False
214,EHSPPSPPP,63521764:63521963,63521759:63521764:63521963:63521985:None:None,47,0.839305,False,False


216 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
216 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 190 batch 48474
Size cancer kmers-junctions 17
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_48474/ref_sample_peptides_meta.gz
(24794, 20)
17 Kmers - junctions not found in gtex. Recurrence is:
[2]
17 Kmers - junctions not found in gtex. Junction annotated is:
[False]
17 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 191 batch 29665
Size cancer kmers-junctions 24
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_29665/ref_sample_peptides_meta.gz
(97629, 20)
24 Kmers - junctions not found in gtex. Recurrence is:
[ 4 27 34]
24 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
24 Kmers -

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
8224,KAVCGSVAC,104756667:104756721,104756659:104756667:104756721:104756740:None:None,141,1.678609,False,False
8225,VCGSVACTL,104756667:104756721,104756665:104756667:104756721:104756746:None:None,141,1.678609,True,False
8226,DGKAVCGSV,104756667:104756721,104756653:104756667:104756721:104756734:None:None,141,1.678609,False,False
8227,VVHLHVGRC,104755709:104755752,104755700:104755709:104755752:104755770:None:None,22,0.839305,False,False
8228,GCAVVHLHV,104755709:104755752,104755691:104755709:104755752:104755761:None:None,22,0.839305,False,False
8229,AVCGSVACT,104756667:104756721,104756662:104756667:104756721:104756743:None:None,141,1.678609,False,False
8230,GKAVCGSVA,104756667:104756721,104756656:104756667:104756721:104756737:None:None,141,1.678609,False,False
8231,CAVVHLHVG,104755709:104755752,104755694:104755709:104755752:104755764:None:None,22,0.839305,False,False
8232,EGCAVVHLH,104755709:104755752,104755688:104755709:104755752:104755758:None:None,22,0.839305,False,False
8233,AVCGSVACT,104756667:104756721,104756662:104756667:104756721:104756743:None:None,141,1.678609,True,False


13 Kmers - junctions not found in gtex. Junction annotated is:
[False]
13 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 202 batch 57091
Size cancer kmers-junctions 13
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_57091/ref_sample_peptides_meta.gz
(20852, 20)
13 Kmers - junctions not found in gtex. Recurrence is:
[14]
13 Kmers - junctions not found in gtex. Junction annotated is:
[False]
13 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 203 batch 35139
Size cancer kmers-junctions 344
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_35139/ref_sample_peptides_meta.gz
(29583, 20)
344 Kmers - junctions not found in gtex. Recurrence is:
[ 45  25 181  48  71  72  70 136  54  66  64  86 234  42 353 143 114  96
 163  50  13  61  19 118  34 2

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PMEGSWPDE,45470532:45479946:45479952:45480026,45470526:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
1,LSAQWRVPG,45470532:45479946:45479952:45480026,45470518:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
2,QWRVPGQMK,45470532:45479946:45479952:45480026,45470527:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
3,SLPNGGFLA,45470532:45479946:45479952:45480026,45470519:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
4,AQWRVPGQM,45470532:45479946:45479952:45480026,45470524:45470532:45479946:45479952:45480026:4...,45,0.839305,False,False
...,...,...,...,...,...,...,...
339,SDKEGSWPD,45469860:45479941:45479952:45480026,45469856:45469860:45479941:45479952:45480026:4...,53,0.839305,True,True
340,ALCPISEPP,45470525:45480081:45480094:45481376,45470517:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
341,SALCPISEP,45470525:45480081:45480094:45481376,45470514:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False
342,LCPISEPPP,45470525:45480081:45480094:45481376,45470520:45470525:45480081:45480094:45481376:4...,133,1.678609,False,False


344 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
344 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 204 batch 15519
Size cancer kmers-junctions 13
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_15519/ref_sample_peptides_meta.gz
(3915, 20)
13 Kmers - junctions not found in gtex. Recurrence is:
[9]
13 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
13 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 205 batch 18720
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_18720/ref_sample_peptides_meta.gz
(27034, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[7]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junct

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
8651,SSLRMPPTV,27241971:27242717,27241969:27241971:27242717:27242742:None:None,190,2.517914,False,False
8652,LCSSLRMPP,27241971:27242717,27241963:27241971:27242717:27242736:None:None,190,2.517914,False,False
8653,SSSFAAACA,27241971:27242717,27241955:27241971:27242717:27242728:None:None,190,2.517914,False,False
8654,LLCSSLRMP,27241971:27242717,27241960:27241971:27242717:27242733:None:None,190,2.517914,False,False
8655,PLQQPAHAT,27241971:27242717,27241962:27241971:27242717:27242735:None:None,190,2.517914,False,False
8656,SSSFAAACA,27241971:27242717,27241955:27241971:27242717:27242728:None:None,190,2.517914,True,False
8657,QQPAHATHC,27241971:27242717,27241968:27241971:27242717:27242741:None:None,190,2.517914,False,False
8658,SSFAAACAC,27241971:27242717,27241958:27241971:27242717:27242731:None:None,190,2.517914,True,False
8659,SSFAAACAC,27241971:27242717,27241958:27241971:27242717:27242731:None:None,190,2.517914,False,False
8660,CSSLRMPPT,27241971:27242717,27241966:27241971:27242717:27242739:None:None,190,2.517914,False,False


21 Kmers - junctions not found in gtex. Junction annotated is:
[False]
21 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 212 batch 58576
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_58576/ref_sample_peptides_meta.gz
(98923, 20)
14 Kmers - junctions not found in gtex. Recurrence is:
[3]
14 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
14 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 213 batch 21274
Size cancer kmers-junctions 21
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_21274/ref_sample_peptides_meta.gz
(10122, 20)
21 Kmers - junctions not found in gtex. Recurrence is:
[31 13]
21 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
21 Kmers 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
8750,QLQAMHAQC,6941447:6941515,6941429:6941447:6941515:6941524:None:None,184,3.357219,False,False
8751,QLFAAPPRS,6941023:6942158,6941013:6941023:6942158:6942175:None:None,5,0.839305,False,False
8752,ARTVCRCLP,6941447:6941515,6941443:6941447:6941515:6941538:None:None,184,3.357219,False,False
8753,LQAMHAQCA,6941447:6941515,6941432:6941447:6941515:6941527:None:None,184,3.357219,False,False
8754,SLLPLPDPC,6941023:6942158,6941017:6941023:6942158:6942179:None:None,5,0.839305,False,False
8755,RRELERSVL,6937968:6938052:6938067:6938483,6937959:6937968:6938052:6938067:6938483:6938486,36,3.357219,False,False
8756,PFLQRLALE,6941406:6941462,6941400:6941406:6941462:6941483:None:None,37,0.839305,False,False
8757,AMHAQCAAA,6941447:6941515,6941438:6941447:6941515:6941533:None:None,184,3.357219,False,False
8758,KVRRELERS,6937968:6938052,6937953:6937968:6938052:6938064:None:None,36,3.357219,False,False
8759,RHQLFAAPP,6941023:6942158,6941007:6941023:6942158:6942169:None:None,5,0.839305,False,False


57 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
57 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 219 batch 27334
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_27334/ref_sample_peptides_meta.gz
(17733, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[2]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
12 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 220 batch 399
Size cancer kmers-junctions 63
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_399/ref_sample_peptides_meta.gz
(88193, 20)
63 Kmers - junctions not found in gtex. Recurrence is:
[27  8 74 14 31]
63 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
63 Kmers -

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SSCPASQDA,54852283:54853566,54853566:54853587:54852277:54852283:None:None,35,1.678609,False,False
1,PAQPARTPS,54852283:54853566,54853566:54853583:54852273:54852283:None:None,35,1.678609,False,False
2,QPGRLPRGV,54852283:54853566,54853566:54853573:54852263:54852283:None:None,35,1.678609,False,False
3,ASQDAFPEV,54852283:54853566,54853566:54853575:54852265:54852283:None:None,35,1.678609,False,False
4,AQPARTPSP,54852283:54853566,54853566:54853580:54852270:54852283:None:None,35,1.678609,False,False
...,...,...,...,...,...,...,...
226,VVQDAFPEV,54852280:54854096,54854096:54854108:54852265:54852280:None:None,32,0.839305,False,False
227,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,True,True
228,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,False,True
229,LSPVHSHGW,54851794:54851905,54851905:54851907:54851769:54851794:None:None,78,30.214967,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SSCPASQDA,54852283:54853566,54853566:54853587:54852277:54852283:None:None,35,1.678609,False,False
1,PAQPARTPS,54852283:54853566,54853566:54853583:54852273:54852283:None:None,35,1.678609,False,False
2,QPGRLPRGV,54852283:54853566,54853566:54853573:54852263:54852283:None:None,35,1.678609,False,False
3,ASQDAFPEV,54852283:54853566,54853566:54853575:54852265:54852283:None:None,35,1.678609,False,False
4,AQPARTPSP,54852283:54853566,54853566:54853580:54852270:54852283:None:None,35,1.678609,False,False
...,...,...,...,...,...,...,...
226,VVQDAFPEV,54852280:54854096,54854096:54854108:54852265:54852280:None:None,32,0.839305,False,False
227,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,True,True
228,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,False,True
229,LSPVHSHGW,54851794:54851905,54851905:54851907:54851769:54851794:None:None,78,30.214967,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SSCPASQDA,54852283:54853566,54853566:54853587:54852277:54852283:None:None,35,1.678609,False,False
1,PAQPARTPS,54852283:54853566,54853566:54853583:54852273:54852283:None:None,35,1.678609,False,False
2,QPGRLPRGV,54852283:54853566,54853566:54853573:54852263:54852283:None:None,35,1.678609,False,False
3,ASQDAFPEV,54852283:54853566,54853566:54853575:54852265:54852283:None:None,35,1.678609,False,False
4,AQPARTPSP,54852283:54853566,54853566:54853580:54852270:54852283:None:None,35,1.678609,False,False
...,...,...,...,...,...,...,...
226,VVQDAFPEV,54852280:54854096,54854096:54854108:54852265:54852280:None:None,32,0.839305,False,False
227,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,True,True
228,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,False,True
229,LSPVHSHGW,54851794:54851905,54851905:54851907:54851769:54851794:None:None,78,30.214967,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SSCPASQDA,54852283:54853566,54853566:54853587:54852277:54852283:None:None,35,1.678609,False,False
1,PAQPARTPS,54852283:54853566,54853566:54853583:54852273:54852283:None:None,35,1.678609,False,False
2,QPGRLPRGV,54852283:54853566,54853566:54853573:54852263:54852283:None:None,35,1.678609,False,False
3,ASQDAFPEV,54852283:54853566,54853566:54853575:54852265:54852283:None:None,35,1.678609,False,False
4,AQPARTPSP,54852283:54853566,54853566:54853580:54852270:54852283:None:None,35,1.678609,False,False
...,...,...,...,...,...,...,...
226,VVQDAFPEV,54852280:54854096,54854096:54854108:54852265:54852280:None:None,32,0.839305,False,False
227,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,True,True
228,TKKQIVRVA,54876047:54883617:54876036:54875968,54883617:54883629:54876036:54876047:54875964:5...,27,0.839305,False,True
229,LSPVHSHGW,54851794:54851905,54851905:54851907:54851769:54851794:None:None,78,30.214967,False,False


231 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
231 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 227 batch 49542
Size cancer kmers-junctions 32
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_49542/ref_sample_peptides_meta.gz
(46400, 20)
32 Kmers - junctions not found in gtex. Recurrence is:
[94 20  2 31 16 28]
32 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
32 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 228 batch 55241
Size cancer kmers-junctions 218
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_55241/ref_sample_peptides_meta.gz
(52338, 20)
Size cancer kmers-junctions + all GTEX peptides 66


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,LRLQEGWPG,186,141.842484,False,False,False
3,QEGWPGVPQ,186,141.842484,False,False,True
6,PRRLAGRTT,186,141.842484,False,False,True
9,PSGSKKAGR,186,141.842484,False,False,False
12,ALRLQEGWP,186,141.842484,False,False,False
15,LPSGSKKAG,186,141.842484,False,False,False
18,LQEGWPGVP,186,141.842484,False,False,True
21,AALRLQEGW,186,141.842484,False,False,False
24,KKAGRAYHK,186,141.842484,False,False,True
27,RLQEGWPGV,186,141.842484,False,False,True


200 Kmers - junctions not found in gtex. Recurrence is:
[119 347  95  90 285 293 193 333 247 369 188 192  12 187 351  87 114  21
 107 309  36 155 323 173]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KPGDRGKLE,144790457:144791396,144791396:144791421:144790455:144790457:None:None,119,0.839305,False,False
1,AYHKYKAKW,144789874:144790393,144790393:144790417:144789871:144789874:None:None,347,4.196523,False,False
2,KYKAKWVSL,144789874:144790393,144790393:144790408:144789862:144789874:None:None,347,4.196523,False,False
3,TNIRQSGSH,144789874:144790393,144790393:144790410:144789864:144789874:None:None,347,4.196523,False,False
4,IRQSGSHCC,144789874:144790393,144790393:144790404:144789858:144789874:None:None,347,4.196523,False,False
...,...,...,...,...,...,...,...
195,AVVGGGWRW,144790465:144791273:144790455:144789940,144791273:144791286:144790455:144790465:144789...,155,0.839305,False,False
196,VVGGGWRWQ,144790465:144791273:144790455:144789940,144791273:144791283:144790455:144790465:144789...,155,0.839305,False,False
197,IVWWLEVAT,144790470:144791439:144790455:144789940,144791439:144791442:144790455:144790470:144789...,323,104.073775,False,False
198,QSCGWWWLE,144790465:144791273,144791273:144791290:144790455:144790465:None:None,173,0.839305,False,False


200 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
200 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 229 batch 38829
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_38829/ref_sample_peptides_meta.gz
(55067, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[42]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 230 batch 58598
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_58598/ref_sample_peptides_meta.gz
(28181, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[5]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,PDADPAGAA,168,1.678609,True,True,True
28,ADPAGAAEV,168,1.678609,True,True,True
56,PAGAAEVWA,168,1.678609,True,True,True
84,DADPAGAAE,168,1.678609,True,True,True
112,DPAGAAEVW,168,1.678609,True,True,True
140,AGAAEVWAW,168,1.678609,True,True,True


30 Kmers - junctions not found in gtex. Recurrence is:
[16  9 47]
30 Kmers - junctions not found in gtex. Junction annotated is:
[False]
30 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 234 batch 45851
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_45851/ref_sample_peptides_meta.gz
(27942, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[17]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 235 batch 29767
Size cancer kmers-junctions 105
CHECK COMPLETION OF /cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_29767

 Iteration 236 batch 56707
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_gen

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,VLAPPLPWS,45902493:45902548,45902548:45902562:45902480:45902493:None:None,15,0.839305,False,False
1,PPLPWSHQT,45902493:45902548,45902548:45902553:45902471:45902493:None:None,15,0.839305,False,False
2,ELRVLAPPL,45902493:45902548,45902548:45902571:45902489:45902493:None:None,15,0.839305,False,False
3,LRVLAPPLP,45902493:45902548,45902548:45902568:45902486:45902493:None:None,15,0.839305,False,False
4,RVLAPPLPW,45902493:45902548,45902548:45902565:45902483:45902493:None:None,15,0.839305,False,False
...,...,...,...,...,...,...,...
382,SGLSPQSPS,45898940:45899040,45899040:45899050:45898923:45898940:None:None,41,0.839305,False,False
383,DSSPSGLSP,45898940:45899040,45899040:45899062:45898935:45898940:None:None,41,0.839305,False,False
384,GFLTIGAEP,45898940:45899040,45899040:45899063:45898936:45898940:None:None,41,0.839305,False,False
385,GLSPQSPSW,45898940:45899040,45899040:45899047:45898920:45898940:None:None,41,0.839305,False,False


387 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
387 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 244 batch 35998
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_35998/ref_sample_peptides_meta.gz
(38441, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[76]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 245 batch 28636
Size cancer kmers-junctions 101
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_28636/ref_sample_peptides_meta.gz
(18079, 20)
101 Kmers - junctions not found in gtex. Recurrence is:
[59 80 93  6 47  7  2]
101 Kmers - junctions not found in gtex. Junction annotated is:
[ True False

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10632,KYPDSHQPK,154273864:154275202,154273847:154273864:154275202:154275212:None:None,211,2.517914,True,False
10633,TSPNPISRA,154273864:154275202,154273863:154273864:154275202:154275228:None:None,211,2.517914,False,False
10634,LSIQIVTRL,154273862:154273926,154273845:154273862:154273926:154273936:None:None,77,0.839305,False,False
10635,SIQIVTSPN,154273864:154275202,154273848:154273864:154275202:154275213:None:None,211,2.517914,False,False
10636,KYPDSHQPK,154273864:154275202,154273847:154273864:154275202:154275212:None:None,211,2.517914,False,False
10637,PDSHQTGAP,154273862:154273926,154273853:154273862:154273926:154273944:None:None,77,0.839305,True,False
10638,IVTSPNPIS,154273864:154275202,154273857:154273864:154275202:154275222:None:None,211,2.517914,False,False
10639,DSHQPKSYF,154273864:154275202,154273856:154273864:154275202:154275221:None:None,211,2.517914,True,False
10640,HQPKSYFKS,154273864:154275202,154273862:154273864:154275202:154275227:None:None,211,2.517914,False,False
10641,YPDSHQPKS,154273864:154275202,154273850:154273864:154275202:154275215:None:None,211,2.517914,False,False


37 Kmers - junctions not found in gtex. Junction annotated is:
[False]
37 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 257 batch 21166
Size cancer kmers-junctions 30
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_21166/ref_sample_peptides_meta.gz
(326969, 20)
30 Kmers - junctions not found in gtex. Recurrence is:
[46 21]
30 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
30 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 258 batch 75
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_75/ref_sample_peptides_meta.gz
(41929, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[15]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junction

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10744,TASWSAHQL,73769229:73769307,73769307:73769331:73769226:73769229:None:None,37,0.839305,False,False
10745,AALLTLVPP,73769291:73769647,73769647:73769671:73769288:73769291:None:None,17,17.625397,True,False
10746,CSKPPRWTG,73769255:73769501,73769501:73769506:73769233:73769255:None:None,39,23.500530,True,False
10747,PVEAPALDW,73769255:73769599,73769599:73769605:73769234:73769255:None:None,133,56.233411,False,False
10748,CCSKPPRWT,73769255:73769501,73769501:73769509:73769236:73769255:None:None,39,23.500530,False,False
...,...,...,...,...,...,...,...
10935,YNPVVPEAT,73769412:73769590,73769590:73769611:73769406:73769412:None:None,44,0.839305,True,False
10936,QGQPTPRSQ,73769278:73769464,73769464:73769468:73769255:73769278:None:None,17,34.411490,False,False
10937,KYTATKASP,73769278:73769464,73769464:73769482:73769269:73769278:None:None,17,34.411490,True,False
10938,GALLCCSKP,73769255:73769501,73769501:73769521:73769248:73769255:None:None,39,23.500530,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10744,TASWSAHQL,73769229:73769307,73769307:73769331:73769226:73769229:None:None,37,0.839305,False,False
10745,AALLTLVPP,73769291:73769647,73769647:73769671:73769288:73769291:None:None,17,17.625397,True,False
10746,CSKPPRWTG,73769255:73769501,73769501:73769506:73769233:73769255:None:None,39,23.500530,True,False
10747,PVEAPALDW,73769255:73769599,73769599:73769605:73769234:73769255:None:None,133,56.233411,False,False
10748,CCSKPPRWT,73769255:73769501,73769501:73769509:73769236:73769255:None:None,39,23.500530,False,False
...,...,...,...,...,...,...,...
10935,YNPVVPEAT,73769412:73769590,73769590:73769611:73769406:73769412:None:None,44,0.839305,True,False
10936,QGQPTPRSQ,73769278:73769464,73769464:73769468:73769255:73769278:None:None,17,34.411490,False,False
10937,KYTATKASP,73769278:73769464,73769464:73769482:73769269:73769278:None:None,17,34.411490,True,False
10938,GALLCCSKP,73769255:73769501,73769501:73769521:73769248:73769255:None:None,39,23.500530,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10744,TASWSAHQL,73769229:73769307,73769307:73769331:73769226:73769229:None:None,37,0.839305,False,False
10745,AALLTLVPP,73769291:73769647,73769647:73769671:73769288:73769291:None:None,17,17.625397,True,False
10746,CSKPPRWTG,73769255:73769501,73769501:73769506:73769233:73769255:None:None,39,23.500530,True,False
10747,PVEAPALDW,73769255:73769599,73769599:73769605:73769234:73769255:None:None,133,56.233411,False,False
10748,CCSKPPRWT,73769255:73769501,73769501:73769509:73769236:73769255:None:None,39,23.500530,False,False
...,...,...,...,...,...,...,...
10935,YNPVVPEAT,73769412:73769590,73769590:73769611:73769406:73769412:None:None,44,0.839305,True,False
10936,QGQPTPRSQ,73769278:73769464,73769464:73769468:73769255:73769278:None:None,17,34.411490,False,False
10937,KYTATKASP,73769278:73769464,73769464:73769482:73769269:73769278:None:None,17,34.411490,True,False
10938,GALLCCSKP,73769255:73769501,73769501:73769521:73769248:73769255:None:None,39,23.500530,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10744,TASWSAHQL,73769229:73769307,73769307:73769331:73769226:73769229:None:None,37,0.839305,False,False
10745,AALLTLVPP,73769291:73769647,73769647:73769671:73769288:73769291:None:None,17,17.625397,True,False
10746,CSKPPRWTG,73769255:73769501,73769501:73769506:73769233:73769255:None:None,39,23.500530,True,False
10747,PVEAPALDW,73769255:73769599,73769599:73769605:73769234:73769255:None:None,133,56.233411,False,False
10748,CCSKPPRWT,73769255:73769501,73769501:73769509:73769236:73769255:None:None,39,23.500530,False,False
...,...,...,...,...,...,...,...
10935,YNPVVPEAT,73769412:73769590,73769590:73769611:73769406:73769412:None:None,44,0.839305,True,False
10936,QGQPTPRSQ,73769278:73769464,73769464:73769468:73769255:73769278:None:None,17,34.411490,False,False
10937,KYTATKASP,73769278:73769464,73769464:73769482:73769269:73769278:None:None,17,34.411490,True,False
10938,GALLCCSKP,73769255:73769501,73769501:73769521:73769248:73769255:None:None,39,23.500530,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10744,TASWSAHQL,73769229:73769307,73769307:73769331:73769226:73769229:None:None,37,0.839305,False,False
10745,AALLTLVPP,73769291:73769647,73769647:73769671:73769288:73769291:None:None,17,17.625397,True,False
10746,CSKPPRWTG,73769255:73769501,73769501:73769506:73769233:73769255:None:None,39,23.500530,True,False
10747,PVEAPALDW,73769255:73769599,73769599:73769605:73769234:73769255:None:None,133,56.233411,False,False
10748,CCSKPPRWT,73769255:73769501,73769501:73769509:73769236:73769255:None:None,39,23.500530,False,False
...,...,...,...,...,...,...,...
10935,YNPVVPEAT,73769412:73769590,73769590:73769611:73769406:73769412:None:None,44,0.839305,True,False
10936,QGQPTPRSQ,73769278:73769464,73769464:73769468:73769255:73769278:None:None,17,34.411490,False,False
10937,KYTATKASP,73769278:73769464,73769464:73769482:73769269:73769278:None:None,17,34.411490,True,False
10938,GALLCCSKP,73769255:73769501,73769501:73769521:73769248:73769255:None:None,39,23.500530,True,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
10744,TASWSAHQL,73769229:73769307,73769307:73769331:73769226:73769229:None:None,37,0.839305,False,False
10745,AALLTLVPP,73769291:73769647,73769647:73769671:73769288:73769291:None:None,17,17.625397,True,False
10746,CSKPPRWTG,73769255:73769501,73769501:73769506:73769233:73769255:None:None,39,23.500530,True,False
10747,PVEAPALDW,73769255:73769599,73769599:73769605:73769234:73769255:None:None,133,56.233411,False,False
10748,CCSKPPRWT,73769255:73769501,73769501:73769509:73769236:73769255:None:None,39,23.500530,False,False
...,...,...,...,...,...,...,...
10935,YNPVVPEAT,73769412:73769590,73769590:73769611:73769406:73769412:None:None,44,0.839305,True,False
10936,QGQPTPRSQ,73769278:73769464,73769464:73769468:73769255:73769278:None:None,17,34.411490,False,False
10937,KYTATKASP,73769278:73769464,73769464:73769482:73769269:73769278:None:None,17,34.411490,True,False
10938,GALLCCSKP,73769255:73769501,73769501:73769521:73769248:73769255:None:None,39,23.500530,True,False


196 Kmers - junctions not found in gtex. Junction annotated is:
[False]
196 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 263 batch 24300
Size cancer kmers-junctions 471
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_24300/ref_sample_peptides_meta.gz
(19002, 20)
471 Kmers - junctions not found in gtex. Recurrence is:
[ 64  89 113 103  63   9  67 121 137  30 193 112 131  20  59 218  22 216
 140  49  54 217 145  60   5  87  23 207 153  11 239 226  58  77 184 250
  78  44  34   4  72  48  53]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SVEAYLKKN,41524866:41524959,41524959:41524971:41524851:41524866:None:None,64,0.839305,False,False
1,EAYLKKNHE,41524866:41524959,41524959:41524965:41524845:41524866:None:None,64,0.839305,False,False
2,VEAYLKKNH,41524866:41524959,41524959:41524968:41524848:41524866:None:None,64,0.839305,True,False
3,ALRMSVEAY,41524866:41524959,41524959:41524983:41524863:41524866:None:None,64,0.839305,False,False
4,MSVEAYLKK,41524866:41524959,41524959:41524974:41524854:41524866:None:None,64,0.839305,False,False
...,...,...,...,...,...,...,...
466,MSVETDLEM,41524911:41524962,41524962:41524974:41524896:41524911:None:None,64,0.839305,False,False
467,SVETDLEMQ,41524911:41524962,41524962:41524971:41524893:41524911:None:None,64,0.839305,False,False
468,VETDLEMQI,41524911:41524962,41524962:41524968:41524890:41524911:None:None,64,0.839305,True,False
469,ERGDRPGDA,41524911:41524962,41524962:41524972:41524894:41524911:None:None,64,0.839305,False,False


471 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
471 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 264 batch 22738
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_22738/ref_sample_peptides_meta.gz
(15091, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[57]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 265 batch 158
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_158/ref_sample_peptides_meta.gz
(84400, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[5]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not fou

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,SQQAASKWT,198,0.839305,False,True,True
62,WSQQAASKW,198,0.839305,False,True,True
124,QHSRRPASG,86,0.839305,False,True,True
134,SQHSRRPAS,86,0.839305,False,True,True
144,HSRRPASGP,86,0.839305,False,True,True
154,WSQQAASKV,198,0.839305,False,False,True
194,EPAGGQQGN,198,0.839305,False,False,True
234,SQHSRRPAR,44,0.839305,False,False,True
246,QHSRRPARL,44,0.839305,False,False,True
258,HSRRPARLS,44,0.839305,False,False,True


290 Kmers - junctions not found in gtex. Recurrence is:
[125  68  74  39 154 168  44  17  11 187  47 253 177  41 171  15  59  98
  69 237 175  99   6  70  37 119  58 103  76 114  80 100  19  28]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ASGSSDLQS,62306292:62307764,62306280:62306292:62307764:62307779:None:None,125,0.839305,False,False
1,SGSSDLQSI,62306292:62307764,62306283:62306292:62307764:62307782:None:None,125,0.839305,False,False
2,GSSDLQSIL,62306292:62307764,62306286:62306292:62307764:62307785:None:None,125,0.839305,False,False
3,ALGASGSSD,62306292:62307764,62306271:62306292:62307764:62307770:None:None,125,0.839305,False,False
4,SSDLQSILA,62306292:62307764,62306289:62306292:62307764:62307788:None:None,125,0.839305,False,False
...,...,...,...,...,...,...,...
285,ALGALTGPG,62306273:62307374,62306271:62306273:62307374:62307399:None:None,28,0.839305,False,False
286,PMPGALGAL,62306273:62307374,62306259:62306273:62307374:62307387:None:None,28,0.839305,False,False
287,MPGALGALT,62306273:62307374,62306262:62306273:62307374:62307390:None:None,28,0.839305,False,False
288,PGALGALTG,62306273:62307374,62306265:62306273:62307374:62307393:None:None,28,0.839305,False,False


290 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
290 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 268 batch 10016
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_10016/ref_sample_peptides_meta.gz
(70803, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[10 21]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 269 batch 30754
Size cancer kmers-junctions 5
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_30754/ref_sample_peptides_meta.gz
(53953, 20)
5 Kmers - junctions not found in gtex. Recurrence is:
[46]
5 Kmers - junctions not found in gtex. Junction annotated is:
[False]
5 Kmers - junctions not fou

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ETQYEEMAK,52238773:52241574,52238769:52238773:52241574:52241597:None:None,31,0.839305,True,False
1,TLNETQYEE,52238773:52241574,52238760:52238773:52241574:52241588:None:None,31,0.839305,False,False
2,LNETQYEEM,52238773:52241574,52238763:52238773:52241574:52241591:None:None,31,0.839305,True,False
3,RTLNETQYE,52238773:52241574,52238757:52238773:52241574:52241585:None:None,31,0.839305,False,False
4,NETQYEEMA,52238773:52241574,52238766:52238773:52241574:52241594:None:None,31,0.839305,False,False
...,...,...,...,...,...,...,...
75,IAEIDNIKN,52241567:52243116,52241558:52241567:52243116:52243134:None:None,244,1.678609,False,False
76,SSLRSTTSR,52241567:52243116,52241556:52241567:52243116:52243132:None:None,244,1.678609,False,False
77,DGIIAEIDN,52241567:52243116,52241549:52241567:52243116:52243125:None:None,244,1.678609,False,False
78,LDGIIAEID,52241567:52243116,52241546:52241567:52243116:52243122:None:None,244,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ETQYEEMAK,52238773:52241574,52238769:52238773:52241574:52241597:None:None,31,0.839305,True,False
1,TLNETQYEE,52238773:52241574,52238760:52238773:52241574:52241588:None:None,31,0.839305,False,False
2,LNETQYEEM,52238773:52241574,52238763:52238773:52241574:52241591:None:None,31,0.839305,True,False
3,RTLNETQYE,52238773:52241574,52238757:52238773:52241574:52241585:None:None,31,0.839305,False,False
4,NETQYEEMA,52238773:52241574,52238766:52238773:52241574:52241594:None:None,31,0.839305,False,False
...,...,...,...,...,...,...,...
75,IAEIDNIKN,52241567:52243116,52241558:52241567:52243116:52243134:None:None,244,1.678609,False,False
76,SSLRSTTSR,52241567:52243116,52241556:52241567:52243116:52243132:None:None,244,1.678609,False,False
77,DGIIAEIDN,52241567:52243116,52241549:52241567:52243116:52243125:None:None,244,1.678609,False,False
78,LDGIIAEID,52241567:52243116,52241546:52241567:52243116:52243122:None:None,244,1.678609,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ETQYEEMAK,52238773:52241574,52238769:52238773:52241574:52241597:None:None,31,0.839305,True,False
1,TLNETQYEE,52238773:52241574,52238760:52238773:52241574:52241588:None:None,31,0.839305,False,False
2,LNETQYEEM,52238773:52241574,52238763:52238773:52241574:52241591:None:None,31,0.839305,True,False
3,RTLNETQYE,52238773:52241574,52238757:52238773:52241574:52241585:None:None,31,0.839305,False,False
4,NETQYEEMA,52238773:52241574,52238766:52238773:52241574:52241594:None:None,31,0.839305,False,False
...,...,...,...,...,...,...,...
75,IAEIDNIKN,52241567:52243116,52241558:52241567:52243116:52243134:None:None,244,1.678609,False,False
76,SSLRSTTSR,52241567:52243116,52241556:52241567:52243116:52243132:None:None,244,1.678609,False,False
77,DGIIAEIDN,52241567:52243116,52241549:52241567:52243116:52243125:None:None,244,1.678609,False,False
78,LDGIIAEID,52241567:52243116,52241546:52241567:52243116:52243122:None:None,244,1.678609,False,False


80 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
80 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 273 batch 28990
Size cancer kmers-junctions 376
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_28990/ref_sample_peptides_meta.gz
(16445, 20)
376 Kmers - junctions not found in gtex. Recurrence is:
[ 20  37  24  10  15  27  12  18  41   9  19  11  39  53  30 116  13  34
  17  60  14  35  26 110]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GPLVRAKLE,44908902:44909064,44908893:44908902:44909064:44909082:None:None,20,88.966291,False,False
1,PLVRAKLEE,44908902:44909064,44908896:44908902:44909064:44909085:None:None,20,88.966291,False,False
2,PWCAPSWRS,44908902:44909064,44908897:44908902:44909064:44909086:None:None,20,88.966291,False,False
3,SASAWGPWC,44908902:44909064,44908879:44908902:44909064:44909068:None:None,20,88.966291,False,False
4,LVRAKLEEQ,44908902:44909064,44908899:44908902:44909064:44909088:None:None,20,88.966291,False,False
...,...,...,...,...,...,...,...
371,EETRPQRHP,44908615:44908863,44908608:44908615:44908863:44908883:None:None,26,0.839305,False,False
372,PVAEETRPQ,44908615:44908863,44908599:44908615:44908863:44908874:None:None,26,0.839305,False,False
373,AEETRPQRH,44908615:44908863,44908605:44908615:44908863:44908880:None:None,26,0.839305,False,False
374,TPVAEETRP,44908615:44908863,44908596:44908615:44908863:44908871:None:None,26,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,GPLVRAKLE,44908902:44909064,44908893:44908902:44909064:44909082:None:None,20,88.966291,False,False
1,PLVRAKLEE,44908902:44909064,44908896:44908902:44909064:44909085:None:None,20,88.966291,False,False
2,PWCAPSWRS,44908902:44909064,44908897:44908902:44909064:44909086:None:None,20,88.966291,False,False
3,SASAWGPWC,44908902:44909064,44908879:44908902:44909064:44909068:None:None,20,88.966291,False,False
4,LVRAKLEEQ,44908902:44909064,44908899:44908902:44909064:44909088:None:None,20,88.966291,False,False
...,...,...,...,...,...,...,...
371,EETRPQRHP,44908615:44908863,44908608:44908615:44908863:44908883:None:None,26,0.839305,False,False
372,PVAEETRPQ,44908615:44908863,44908599:44908615:44908863:44908874:None:None,26,0.839305,False,False
373,AEETRPQRH,44908615:44908863,44908605:44908615:44908863:44908880:None:None,26,0.839305,False,False
374,TPVAEETRP,44908615:44908863,44908596:44908615:44908863:44908871:None:None,26,0.839305,False,False


376 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
376 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 274 batch 25471
Size cancer kmers-junctions 11
CHECK COMPLETION OF /cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_25471

 Iteration 275 batch 5099
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_5099/ref_sample_peptides_meta.gz
(8875, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[7]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 276 batch 35100
Size cancer kmers-junctions 4
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mu

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EDQRSGVPP,41319852:41320340:41320352:41323867,41319848:41319852:41320340:41320352:41323867:4...,116,1.678609,True,True
1,DQRSGVPPS,41319852:41320340:41320352:41323867,41319851:41319852:41320340:41320352:41323867:4...,116,1.678609,False,True
2,EDQRSGVPP,41319852:41320340:41320352:41323867,41319848:41319852:41320340:41320352:41323867:4...,116,1.678609,False,True
3,DQRSGVPPS,41319852:41320340:41320352:41323867,41319851:41319852:41320340:41320352:41323867:4...,116,1.678609,True,True
4,EEDQRSGVP,41319852:41320340:41320352:41323867,41319845:41319852:41320340:41320352:41323867:4...,116,1.678609,False,True
5,EEDQRSGVP,41319852:41320340:41320352:41323867,41319845:41319852:41320340:41320352:41323867:4...,116,1.678609,True,True
6,DEEDQRSGV,41319852:41320340:41320352:41323867,41319842:41319852:41320340:41320352:41323867:4...,116,1.678609,True,True
7,DEEDQRSGV,41319852:41320340:41320352:41323867,41319842:41319852:41320340:41320352:41323867:4...,116,1.678609,False,True
8,RSGVPPSHA,41320352:41323867,41320345:41320352:41323867:41323887:None:None,116,1.678609,True,False
9,SGVPPSHAP,41320352:41323867,41320348:41320352:41323867:41323890:None:None,116,1.678609,True,False


14 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
14 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 279 batch 16998
Size cancer kmers-junctions 10
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_16998/ref_sample_peptides_meta.gz
(51229, 20)
10 Kmers - junctions not found in gtex. Recurrence is:
[14]
10 Kmers - junctions not found in gtex. Junction annotated is:
[False]
10 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 280 batch 28804
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_28804/ref_sample_peptides_meta.gz
(7847, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[4]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions no

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,EWDFPSVGA,62629208:62629257:62629193:62627353,62629257:62629266:62629193:62629208:62627350:6...,19,0.839305,False,False
1,NEPCSRMPS,62629883:62629911,62629911:62629921:62629866:62629883:None:None,2,1.678609,False,False
2,VWNDMNEPC,62629883:62629911,62629911:62629936:62629881:62629883:None:None,2,1.678609,False,False
3,DMNEPCSRM,62629883:62629911,62629911:62629927:62629872:62629883:None:None,2,1.678609,False,False
4,MNEPCSRMP,62629883:62629911,62629911:62629924:62629869:62629883:None:None,2,1.678609,False,False
5,NDMNEPCSR,62629883:62629911,62629911:62629930:62629875:62629883:None:None,2,1.678609,False,False
6,WNDMNEPCS,62629883:62629911,62629911:62629933:62629878:62629883:None:None,2,1.678609,False,False
7,EPCSRMPSI,62629883:62629911,62629911:62629918:62629863:62629883:None:None,2,1.678609,False,False
8,QYLLGYSSR,62626447:62627288,62627288:62627301:62626433:62626447:None:None,2,3.357219,False,False
9,LLGYSSRRA,62626447:62627288,62627288:62627295:62626427:62626447:None:None,2,3.357219,False,False


57 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
57 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 287 batch 10042
Size cancer kmers-junctions 19
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_10042/ref_sample_peptides_meta.gz
(31533, 20)
19 Kmers - junctions not found in gtex. Recurrence is:
[65 20 13]
19 Kmers - junctions not found in gtex. Junction annotated is:
[False]
19 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 288 batch 49874
Size cancer kmers-junctions 34
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_49874/ref_sample_peptides_meta.gz
(23208, 20)
34 Kmers - junctions not found in gtex. Recurrence is:
[ 6 10  4]
34 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
34 Kmers - 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,NHNCCGPLS,88898922:88899429,88899429:88899433:88898899:88898922:None:None,5,0.839305,False,False
1,AWHNHNCCG,88898922:88899429,88899429:88899442:88898908:88898922:None:None,5,0.839305,False,False
2,ILPVAWHNH,88898922:88899429,88899429:88899454:88898920:88898922:None:None,5,0.839305,False,False
3,VAWHNHNCC,88898922:88899429,88899429:88899445:88898911:88898922:None:None,5,0.839305,False,False
4,WHNHNCCGP,88898922:88899429,88899429:88899439:88898905:88898922:None:None,5,0.839305,False,False
5,HNHNCCGPL,88898922:88899429,88899429:88899436:88898902:88898922:None:None,5,0.839305,False,False
6,PVAWHNHNC,88898922:88899429,88899429:88899448:88898914:88898922:None:None,5,0.839305,False,False
7,LPVAWHNHN,88898922:88899429,88899429:88899451:88898917:88898922:None:None,5,0.839305,False,False
8,TLRFELLEP,88901634:88905771,88905771:88905793:88901629:88901634:None:None,28,0.839305,False,False
9,CTLRFELLE,88901634:88905771,88905771:88905796:88901632:88901634:None:None,28,0.839305,False,False


40 Kmers - junctions not found in gtex. Junction annotated is:
[False]
40 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 292 batch 3183
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_3183/ref_sample_peptides_meta.gz
(35355, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[1 2]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 293 batch 22988
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_22988/ref_sample_peptides_meta.gz
(229241, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[7]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF a

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
12797,KTPTPEVGG,27440911:27441278,27440898:27440911:27441278:27441292:None:None,111,0.839305,False,False
12798,TPEVGGQTE,27440911:27441278,27440907:27440911:27441278:27441301:None:None,111,0.839305,False,False
12799,SVKTPTPEV,27440911:27441278,27440892:27440911:27441278:27441286:None:None,111,0.839305,False,False
12800,PTPEVGGQT,27440911:27441278,27440904:27440911:27441278:27441298:None:None,111,0.839305,False,False
12801,TPTPEVGGQ,27440911:27441278,27440901:27440911:27441278:27441295:None:None,111,0.839305,False,False
12802,VKTPTPEVG,27440911:27441278,27440895:27440911:27441278:27441289:None:None,111,0.839305,False,False
12803,PEVGGQTEP,27440911:27441278,27440910:27440911:27441278:27441304:None:None,111,0.839305,False,False


7 Kmers - junctions not found in gtex. Junction annotated is:
[False]
7 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 305 batch 9630
Size cancer kmers-junctions 91
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9630/ref_sample_peptides_meta.gz
(46933, 20)
91 Kmers - junctions not found in gtex. Recurrence is:
[80  3 10  5 13 79]
91 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
91 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 306 batch 9595
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9595/ref_sample_peptides_meta.gz
(28249, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[1]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,ATWVVLAYE,6869132:6869710,6869123:6869132:6869710:6869728:None:None,130,18.464702,False,False
1,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,False,False
2,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,True,False
3,GATWVVLAY,6869132:6869710,6869120:6869132:6869710:6869725:None:None,130,18.464702,False,False
4,TWVVLAYEP,6869132:6869710,6869126:6869132:6869710:6869731:None:None,130,18.464702,True,False
...,...,...,...,...,...,...,...
303,RQRSSQYTR,6869390:6870058,6869372:6869390:6870058:6870067:None:None,109,54.554801,False,False
304,FEQTKVIAV,6869390:6870058,6869365:6869390:6870058:6870060:None:None,109,54.554801,False,False
305,SRQRSSQYT,6869390:6870058,6869369:6869390:6870058:6870064:None:None,109,54.554801,False,False
306,RADKGHRST,6869390:6870058,6869367:6869390:6870058:6870062:None:None,109,54.554801,False,False


308 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
308 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 311 batch 38433
Size cancer kmers-junctions 14
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_38433/ref_sample_peptides_meta.gz
(74126, 20)
Size cancer kmers-junctions + all GTEX peptides 72


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,QIEKEEQPE,215,1.678609,False,False,False
36,TQIEKEEQP,215,1.678609,False,False,False


12 Kmers - junctions not found in gtex. Recurrence is:
[  4 157  20]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,LDVLQMKEK,39407683:39407772,39407665:39407683:39407772:39407781:None:None,4,0.839305,True,False
1,AAPEFTAAP,39411916:39411970:39411983:39412309,39411913:39411916:39411970:39411983:39412309:3...,157,1.678609,False,False
2,SRLLLSSLQ,39411916:39411970:39411983:39412309,39411908:39411916:39411970:39411983:39412309:3...,157,1.678609,False,False
3,QAAPEFTAA,39411916:39411970:39411983:39412309,39411910:39411916:39411970:39411983:39412309:3...,157,1.678609,False,False
4,RLLLSSLQL,39411916:39411970:39411983:39412309,39411911:39411916:39411970:39411983:39412309:3...,157,1.678609,False,False
5,EWTGLQLPL,39411960:39412302,39411955:39411960:39412302:39412324:None:None,20,0.839305,False,False
6,GEWTGLQLP,39411960:39412302,39411952:39411960:39412302:39412321:None:None,20,0.839305,False,False
7,QGEWTGLQL,39411960:39412302,39411949:39411960:39412302:39412318:None:None,20,0.839305,False,False
8,FQGEWTGLQ,39411960:39412302,39411946:39411960:39412302:39412315:None:None,20,0.839305,False,False
9,EEFQGEWTG,39411960:39412302,39411940:39411960:39412302:39412309:None:None,20,0.839305,False,False


12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 312 batch 29196
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_29196/ref_sample_peptides_meta.gz
(17133, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[32  6]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 313 batch 7811
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_7811/ref_sample_peptides_meta.gz
(28489, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[21]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found i

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,STGNPSTSL,40367764:40367803,40367744:40367764:40367803:40367810:None:None,30,0.839305,False,False
1,GNPSTSLDI,40367764:40367803,40367750:40367764:40367803:40367816:None:None,30,0.839305,False,False
2,TGNPSTSLD,40367764:40367803,40367747:40367764:40367803:40367813:None:None,30,0.839305,False,False
3,TSLDIASFY,40367764:40367803,40367762:40367764:40367803:40367828:None:None,30,0.839305,True,False
4,STSLDIASF,40367764:40367803,40367759:40367764:40367803:40367825:None:None,30,0.839305,True,False
...,...,...,...,...,...,...,...
460,VPADPGALG,40377799:40378209,40377797:40377799:40378209:40378234:None:None,254,1.678609,False,False
461,VPRGPRMAS,40377799:40378208:40378223:40380376,40377797:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
462,VVPRGPRMA,40377799:40378208:40378223:40380376,40377794:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False
463,FVVPRGPRM,40377799:40378208:40378223:40380376,40377791:40377799:40378208:40378223:40380376:4...,104,0.839305,False,False


465 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
465 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 321 batch 47444
Size cancer kmers-junctions 100
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_47444/ref_sample_peptides_meta.gz
(90916, 20)
100 Kmers - junctions not found in gtex. Recurrence is:
[10  4 30 23 12 34  5 13 50 14  2 22]
100 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
100 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 322 batch 8830
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_8830/ref_sample_peptides_meta.gz
(23957, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[18]
12 Kmers - junctions not found in gtex. Junct

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,MGPKAKKSG,141,1.678609,True,True,True



 Iteration 326 batch 27435
Size cancer kmers-junctions 68
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_27435/ref_sample_peptides_meta.gz
(7642, 20)
68 Kmers - junctions not found in gtex. Recurrence is:
[38  7  8 64 69  6]
68 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
68 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 327 batch 13658
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_13658/ref_sample_peptides_meta.gz
(39876, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[12]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 328 batch 434
Size cancer kmers-junctions 43
/cluster/work/grlab/project

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PPQVPEADQ,1373902:1374059,1374059:1374081:1373897:1373902:None:None,32,20.982616,False,False
1,HKYRKLIKF,1373902:1374059,1374059:1374077:1373893:1373902:None:None,32,20.982616,False,False
2,LIKFEKDLR,1373902:1374059,1374059:1374062:1373878:1373902:None:None,32,20.982616,False,False
3,RKLIKFEKD,1373902:1374059,1374059:1374068:1373884:1373902:None:None,32,20.982616,False,False
4,PEADQVRER,1373902:1374059,1374059:1374069:1373885:1373902:None:None,32,20.982616,False,False
...,...,...,...,...,...,...,...
91,IGEGTPETQ,1374018:1374164,1374164:1374176:1374003:1374018:None:None,12,0.839305,False,False
92,QIGEGTPET,1374018:1374164,1374164:1374179:1374006:1374018:None:None,12,0.839305,False,False
93,GEGTPETQA,1374018:1374164,1374164:1374173:1374000:1374018:None:None,12,0.839305,False,False
94,PPSQIGEGT,1374018:1374164,1374164:1374188:1374015:1374018:None:None,12,0.839305,False,False


96 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
96 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 334 batch 22994
Size cancer kmers-junctions 165
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_22994/ref_sample_peptides_meta.gz
(41348, 20)
Size cancer kmers-junctions + all GTEX peptides 30


Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,KRNDFQIGI,269,0.839305,False,False,False
6,TSKGMTSRL,269,0.839305,False,False,False
12,IKRNDFQIG,269,0.839305,False,False,False
18,SKGMTSRLA,269,0.839305,False,False,False
24,KGMTSRLAS,269,0.839305,False,False,False


160 Kmers - junctions not found in gtex. Recurrence is:
[ 51  75  53  23  65  61  90  19 190  13 102  72  46  10  95   3  26 164
  47]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,RRPPPRAWP,7311607:7311862,7311602:7311607:7311862:7311884:None:None,51,0.839305,False,False
1,GGGSSDPLL,7311844:7312041,7311827:7311844:7312041:7312051:None:None,75,0.839305,False,False
2,VILFSPHTT,7311844:7312041,7311840:7311844:7312041:7312064:None:None,75,0.839305,False,False
3,SSDPLLPSH,7311844:7312041,7311836:7311844:7312041:7312060:None:None,75,0.839305,False,False
4,SDPLLPSHY,7311844:7312041,7311839:7311844:7312041:7312063:None:None,75,0.839305,False,False
...,...,...,...,...,...,...,...
155,RQLIIWMPP,7311092:7311862,7311072:7311092:7311862:7311869:None:None,47,24.339834,False,False
156,LIIWMPPPR,7311092:7311862,7311078:7311092:7311862:7311875:None:None,47,24.339834,False,False
157,QLIIWMPPP,7311092:7311862,7311075:7311092:7311862:7311872:None:None,47,24.339834,False,False
158,PSTHNMDAP,7311092:7311862,7311071:7311092:7311862:7311868:None:None,47,24.339834,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,RRPPPRAWP,7311607:7311862,7311602:7311607:7311862:7311884:None:None,51,0.839305,False,False
1,GGGSSDPLL,7311844:7312041,7311827:7311844:7312041:7312051:None:None,75,0.839305,False,False
2,VILFSPHTT,7311844:7312041,7311840:7311844:7312041:7312064:None:None,75,0.839305,False,False
3,SSDPLLPSH,7311844:7312041,7311836:7311844:7312041:7312060:None:None,75,0.839305,False,False
4,SDPLLPSHY,7311844:7312041,7311839:7311844:7312041:7312063:None:None,75,0.839305,False,False
...,...,...,...,...,...,...,...
155,RQLIIWMPP,7311092:7311862,7311072:7311092:7311862:7311869:None:None,47,24.339834,False,False
156,LIIWMPPPR,7311092:7311862,7311078:7311092:7311862:7311875:None:None,47,24.339834,False,False
157,QLIIWMPPP,7311092:7311862,7311075:7311092:7311862:7311872:None:None,47,24.339834,False,False
158,PSTHNMDAP,7311092:7311862,7311071:7311092:7311862:7311868:None:None,47,24.339834,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,RRPPPRAWP,7311607:7311862,7311602:7311607:7311862:7311884:None:None,51,0.839305,False,False
1,GGGSSDPLL,7311844:7312041,7311827:7311844:7312041:7312051:None:None,75,0.839305,False,False
2,VILFSPHTT,7311844:7312041,7311840:7311844:7312041:7312064:None:None,75,0.839305,False,False
3,SSDPLLPSH,7311844:7312041,7311836:7311844:7312041:7312060:None:None,75,0.839305,False,False
4,SDPLLPSHY,7311844:7312041,7311839:7311844:7312041:7312063:None:None,75,0.839305,False,False
...,...,...,...,...,...,...,...
155,RQLIIWMPP,7311092:7311862,7311072:7311092:7311862:7311869:None:None,47,24.339834,False,False
156,LIIWMPPPR,7311092:7311862,7311078:7311092:7311862:7311875:None:None,47,24.339834,False,False
157,QLIIWMPPP,7311092:7311862,7311075:7311092:7311862:7311872:None:None,47,24.339834,False,False
158,PSTHNMDAP,7311092:7311862,7311071:7311092:7311862:7311868:None:None,47,24.339834,False,False


160 Kmers - junctions not found in gtex. Junction annotated is:
[False]
160 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 335 batch 38826
Size cancer kmers-junctions 15
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_38826/ref_sample_peptides_meta.gz
(21877, 20)
15 Kmers - junctions not found in gtex. Recurrence is:
[5]
15 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
15 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 336 batch 52767
Size cancer kmers-junctions 21
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_52767/ref_sample_peptides_meta.gz
(132982, 20)
21 Kmers - junctions not found in gtex. Recurrence is:
[ 7 17 28]
21 Kmers - junctions not found in gtex. Junction annotated is:
[False]
21 Kmers - junc

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PDKPITQCL,20653652:20654614,20654614:20654634:20653645:20653652:None:None,293,0.839305,False,False
1,KPITQCLHC,20653652:20654614,20654614:20654628:20653639:20653652:None:None,293,0.839305,False,False
2,FPDKPITQC,20653652:20654614,20654614:20654637:20653648:20653652:None:None,293,0.839305,False,False
3,DKPITQCLH,20653652:20654614,20654614:20654631:20653642:20653652:None:None,293,0.839305,False,False
4,PITQCLHCH,20653652:20654614,20654614:20654625:20653636:20653652:None:None,293,0.839305,False,False
...,...,...,...,...,...,...,...
60,RLPEVFPDS,20653774:20654222:20653760:20653661,20654222:20654234:20653760:20653774:20653660:2...,189,0.839305,False,True
61,AEGVLRVGP,20653713:20654359,20654359:20654362:20653689:20653713:None:None,258,0.839305,False,False
62,HAEGVLRVG,20653713:20654359,20654359:20654365:20653692:20653713:None:None,258,0.839305,False,False
63,YPHAEGVLR,20653713:20654359,20654359:20654371:20653698:20653713:None:None,258,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PDKPITQCL,20653652:20654614,20654614:20654634:20653645:20653652:None:None,293,0.839305,False,False
1,KPITQCLHC,20653652:20654614,20654614:20654628:20653639:20653652:None:None,293,0.839305,False,False
2,FPDKPITQC,20653652:20654614,20654614:20654637:20653648:20653652:None:None,293,0.839305,False,False
3,DKPITQCLH,20653652:20654614,20654614:20654631:20653642:20653652:None:None,293,0.839305,False,False
4,PITQCLHCH,20653652:20654614,20654614:20654625:20653636:20653652:None:None,293,0.839305,False,False
...,...,...,...,...,...,...,...
60,RLPEVFPDS,20653774:20654222:20653760:20653661,20654222:20654234:20653760:20653774:20653660:2...,189,0.839305,False,True
61,AEGVLRVGP,20653713:20654359,20654359:20654362:20653689:20653713:None:None,258,0.839305,False,False
62,HAEGVLRVG,20653713:20654359,20654359:20654365:20653692:20653713:None:None,258,0.839305,False,False
63,YPHAEGVLR,20653713:20654359,20654359:20654371:20653698:20653713:None:None,258,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,PDKPITQCL,20653652:20654614,20654614:20654634:20653645:20653652:None:None,293,0.839305,False,False
1,KPITQCLHC,20653652:20654614,20654614:20654628:20653639:20653652:None:None,293,0.839305,False,False
2,FPDKPITQC,20653652:20654614,20654614:20654637:20653648:20653652:None:None,293,0.839305,False,False
3,DKPITQCLH,20653652:20654614,20654614:20654631:20653642:20653652:None:None,293,0.839305,False,False
4,PITQCLHCH,20653652:20654614,20654614:20654625:20653636:20653652:None:None,293,0.839305,False,False
...,...,...,...,...,...,...,...
60,RLPEVFPDS,20653774:20654222:20653760:20653661,20654222:20654234:20653760:20653774:20653660:2...,189,0.839305,False,True
61,AEGVLRVGP,20653713:20654359,20654359:20654362:20653689:20653713:None:None,258,0.839305,False,False
62,HAEGVLRVG,20653713:20654359,20654359:20654365:20653692:20653713:None:None,258,0.839305,False,False
63,YPHAEGVLR,20653713:20654359,20654359:20654371:20653698:20653713:None:None,258,0.839305,False,False


65 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
65 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 343 batch 56554
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_56554/ref_sample_peptides_meta.gz
(21421, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[7]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 344 batch 9612
Size cancer kmers-junctions 35
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_9612/ref_sample_peptides_meta.gz
(31496, 20)
35 Kmers - junctions not found in gtex. Recurrence is:
[  6  16  50 119   5]


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KLLVHLKEE,66059856:66060581,66059835:66059856:66060581:66060587:None:None,6,0.839305,False,False
1,LVHLKEEQK,66059856:66060581,66059841:66059856:66060581:66060593:None:None,6,0.839305,False,False
2,VHLKEEQKT,66059856:66060581,66059844:66059856:66060581:66060596:None:None,6,0.839305,False,False
3,HLKEEQKTM,66059856:66060581,66059847:66059856:66060581:66060599:None:None,6,0.839305,False,False
4,PKLLVHLKE,66059856:66060581,66059832:66059856:66060581:66060584:None:None,6,0.839305,False,False
5,LKEEQKTMK,66059856:66060581,66059850:66059856:66060581:66060602:None:None,6,0.839305,False,False
6,LLVHLKEEQ,66059856:66060581,66059838:66059856:66060581:66060590:None:None,6,0.839305,False,False
7,SKKEKKPQR,66058405:66058862,66058393:66058405:66058862:66058877:None:None,16,17.625397,False,False
8,KKEKKPQRV,66058405:66058862,66058396:66058405:66058862:66058880:None:None,16,17.625397,False,False
9,KEKKPQRVR,66058405:66058862,66058399:66058405:66058862:66058883:None:None,16,17.625397,False,False


35 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
35 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 345 batch 7282
Size cancer kmers-junctions 32
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_7282/ref_sample_peptides_meta.gz
(16368, 20)
32 Kmers - junctions not found in gtex. Recurrence is:
[ 3 14  8]
32 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
32 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 346 batch 40820
Size cancer kmers-junctions 11
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_40820/ref_sample_peptides_meta.gz
(39158, 20)
11 Kmers - junctions not found in gtex. Recurrence is:
[67 18]
11 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
11 Kmers -

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,HKELVLKSA,128620557:128622232,128622232:128622238:128620536:128620557:None:None,225,2.517914,False,False
1,WRLNTRSWC,128620557:128622232,128622232:128622249:128620547:128620557:None:None,225,2.517914,False,False
2,RAWRLNTRS,128620557:128622232,128622232:128622255:128620553:128620557:None:None,225,2.517914,False,False
3,EHKELVLKS,128620557:128622232,128622232:128622241:128620539:128620557:None:None,225,2.517914,False,False
4,TEHKELVLK,128620557:128622232,128622232:128622244:128620542:128620557:None:None,225,2.517914,False,False
...,...,...,...,...,...,...,...
101,GGGWSPRST,128620446:128620526,128620526:128620537:128620430:128620446:None:None,23,0.839305,False,False
102,GGGGWSPRS,128620446:128620526,128620526:128620540:128620433:128620446:None:None,23,0.839305,False,False
103,VEAGHQDRP,128620446:128620526,128620526:128620536:128620429:128620446:None:None,23,0.839305,False,False
104,GGWSPRSTT,128620446:128620526,128620526:128620534:128620427:128620446:None:None,23,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,HKELVLKSA,128620557:128622232,128622232:128622238:128620536:128620557:None:None,225,2.517914,False,False
1,WRLNTRSWC,128620557:128622232,128622232:128622249:128620547:128620557:None:None,225,2.517914,False,False
2,RAWRLNTRS,128620557:128622232,128622232:128622255:128620553:128620557:None:None,225,2.517914,False,False
3,EHKELVLKS,128620557:128622232,128622232:128622241:128620539:128620557:None:None,225,2.517914,False,False
4,TEHKELVLK,128620557:128622232,128622232:128622244:128620542:128620557:None:None,225,2.517914,False,False
...,...,...,...,...,...,...,...
101,GGGWSPRST,128620446:128620526,128620526:128620537:128620430:128620446:None:None,23,0.839305,False,False
102,GGGGWSPRS,128620446:128620526,128620526:128620540:128620433:128620446:None:None,23,0.839305,False,False
103,VEAGHQDRP,128620446:128620526,128620526:128620536:128620429:128620446:None:None,23,0.839305,False,False
104,GGWSPRSTT,128620446:128620526,128620526:128620534:128620427:128620446:None:None,23,0.839305,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,HKELVLKSA,128620557:128622232,128622232:128622238:128620536:128620557:None:None,225,2.517914,False,False
1,WRLNTRSWC,128620557:128622232,128622232:128622249:128620547:128620557:None:None,225,2.517914,False,False
2,RAWRLNTRS,128620557:128622232,128622232:128622255:128620553:128620557:None:None,225,2.517914,False,False
3,EHKELVLKS,128620557:128622232,128622232:128622241:128620539:128620557:None:None,225,2.517914,False,False
4,TEHKELVLK,128620557:128622232,128622232:128622244:128620542:128620557:None:None,225,2.517914,False,False
...,...,...,...,...,...,...,...
101,GGGWSPRST,128620446:128620526,128620526:128620537:128620430:128620446:None:None,23,0.839305,False,False
102,GGGGWSPRS,128620446:128620526,128620526:128620540:128620433:128620446:None:None,23,0.839305,False,False
103,VEAGHQDRP,128620446:128620526,128620526:128620536:128620429:128620446:None:None,23,0.839305,False,False
104,GGWSPRSTT,128620446:128620526,128620526:128620534:128620427:128620446:None:None,23,0.839305,False,False


106 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
106 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 353 batch 887
Size cancer kmers-junctions 22
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_887/ref_sample_peptides_meta.gz
(1489, 20)
22 Kmers - junctions not found in gtex. Recurrence is:
[16 26 21 19]
22 Kmers - junctions not found in gtex. Junction annotated is:
[False]
22 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 354 batch 38450
Size cancer kmers-junctions 22
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_38450/ref_sample_peptides_meta.gz
(534462, 20)
22 Kmers - junctions not found in gtex. Recurrence is:
[20 64 29]
22 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
22 Kmers -

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SARLTSLTK,100709188:100709561,100709561:100709586:100709186:100709188:None:None,3,0.839305,False,False
1,LTSLTKLLN,100709188:100709561,100709561:100709577:100709177:100709188:None:None,3,0.839305,False,False
2,RLTSLTKLL,100709188:100709561,100709561:100709580:100709180:100709188:None:None,3,0.839305,False,False
3,ARLTSLTKL,100709188:100709561,100709561:100709583:100709183:100709188:None:None,3,0.839305,False,False
4,KAVDEMNVS,100712789:100713086:100712770:100712407,100713086:100713089:100712770:100712789:100712...,71,2.517914,False,True
5,QKAVDEMNV,100712789:100713086:100712770:100712407,100713086:100713092:100712770:100712789:100712...,71,2.517914,False,True
6,AVDEMNVSG,100712407:100712770,100712770:100712789:100712399:100712407:None:None,71,2.517914,False,False
7,DEMNVSGKS,100712407:100712770,100712770:100712783:100712393:100712407:None:None,71,2.517914,False,False
8,EMNVSGKSF,100712407:100712770,100712770:100712780:100712390:100712407:None:None,71,2.517914,False,False
9,VDEMNVSGK,100712407:100712770,100712770:100712786:100712396:100712407:None:None,71,2.517914,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SARLTSLTK,100709188:100709561,100709561:100709586:100709186:100709188:None:None,3,0.839305,False,False
1,LTSLTKLLN,100709188:100709561,100709561:100709577:100709177:100709188:None:None,3,0.839305,False,False
2,RLTSLTKLL,100709188:100709561,100709561:100709580:100709180:100709188:None:None,3,0.839305,False,False
3,ARLTSLTKL,100709188:100709561,100709561:100709583:100709183:100709188:None:None,3,0.839305,False,False
4,KAVDEMNVS,100712789:100713086:100712770:100712407,100713086:100713089:100712770:100712789:100712...,71,2.517914,False,True
5,QKAVDEMNV,100712789:100713086:100712770:100712407,100713086:100713092:100712770:100712789:100712...,71,2.517914,False,True
6,AVDEMNVSG,100712407:100712770,100712770:100712789:100712399:100712407:None:None,71,2.517914,False,False
7,DEMNVSGKS,100712407:100712770,100712770:100712783:100712393:100712407:None:None,71,2.517914,False,False
8,EMNVSGKSF,100712407:100712770,100712770:100712780:100712390:100712407:None:None,71,2.517914,False,False
9,VDEMNVSGK,100712407:100712770,100712770:100712786:100712396:100712407:None:None,71,2.517914,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,SARLTSLTK,100709188:100709561,100709561:100709586:100709186:100709188:None:None,3,0.839305,False,False
1,LTSLTKLLN,100709188:100709561,100709561:100709577:100709177:100709188:None:None,3,0.839305,False,False
2,RLTSLTKLL,100709188:100709561,100709561:100709580:100709180:100709188:None:None,3,0.839305,False,False
3,ARLTSLTKL,100709188:100709561,100709561:100709583:100709183:100709188:None:None,3,0.839305,False,False
4,KAVDEMNVS,100712789:100713086:100712770:100712407,100713086:100713089:100712770:100712789:100712...,71,2.517914,False,True
5,QKAVDEMNV,100712789:100713086:100712770:100712407,100713086:100713092:100712770:100712789:100712...,71,2.517914,False,True
6,AVDEMNVSG,100712407:100712770,100712770:100712789:100712399:100712407:None:None,71,2.517914,False,False
7,DEMNVSGKS,100712407:100712770,100712770:100712783:100712393:100712407:None:None,71,2.517914,False,False
8,EMNVSGKSF,100712407:100712770,100712770:100712780:100712390:100712407:None:None,71,2.517914,False,False
9,VDEMNVSGK,100712407:100712770,100712770:100712786:100712396:100712407:None:None,71,2.517914,False,False


43 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
43 Kmers - junctions not found in gtex. RF annotated is:
[False  True]

 Iteration 359 batch 46370
Size cancer kmers-junctions 8
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_46370/ref_sample_peptides_meta.gz
(116515, 20)
8 Kmers - junctions not found in gtex. Recurrence is:
[8]
8 Kmers - junctions not found in gtex. Junction annotated is:
[False]
8 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 360 batch 36363
Size cancer kmers-junctions 33
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_36363/ref_sample_peptides_meta.gz
(21545, 20)
33 Kmers - junctions not found in gtex. Recurrence is:
[ 6 39 17]
33 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
33 Kmers - junc

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,MLLNRFGNK,92832117:92836189:92836203:92836329,92832114:92832117:92836189:92836203:92836329:9...,81,0.839305,True,False
1,GFSIGLAIK,92834913:92836189:92836203:92836329,92834911:92834913:92836189:92836203:92836329:9...,160,0.839305,False,True
2,RLLNRFGNK,92834913:92836189:92836203:92836329,92834910:92834913:92836189:92836203:92836329:9...,160,0.839305,False,True
3,AGFSIGLAI,92834913:92836189:92836203:92836329,92834908:92834913:92836189:92836203:92836329:9...,160,0.839305,False,True
4,RRLLNRFGN,92834913:92836189:92836203:92836329,92834907:92834913:92836189:92836203:92836329:9...,160,0.839305,False,True
5,GLAIKFLVP,92836203:92836329,92836199:92836203:92836329:92836352:None:None,160,0.839305,False,False
6,SIGLAIKFL,92836203:92836329,92836193:92836203:92836329:92836346:None:None,160,0.839305,False,False
7,IGLAIKFLV,92836203:92836329,92836196:92836203:92836329:92836349:None:None,160,0.839305,False,False
8,FGNKVFGAL,92836203:92836329,92836201:92836203:92836329:92836354:None:None,160,0.839305,True,False
9,FGNKVFGAL,92836203:92836329,92836201:92836203:92836329:92836354:None:None,160,0.839305,False,False


19 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
19 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 366 batch 3348
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_3348/ref_sample_peptides_meta.gz
(56256, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[79 70]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 367 batch 7301
Size cancer kmers-junctions 43
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_7301/ref_sample_peptides_meta.gz
(96573, 20)
43 Kmers - junctions not found in gtex. Recurrence is:
[ 8 10 57 17 78]
43 Kmers - junctions not found in gtex. Junction annotated is:
[False]
43 Kmers - j

(47244, 20)
124 Kmers - junctions not found in gtex. Recurrence is:
[44  5 10 21 32 47 19  8 51  9]
124 Kmers - junctions not found in gtex. Junction annotated is:
[False]
124 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 386 batch 22445
Size cancer kmers-junctions 7
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_22445/ref_sample_peptides_meta.gz
(55619, 20)
7 Kmers - junctions not found in gtex. Recurrence is:
[3]
7 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
7 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 387 batch 5467
Size cancer kmers-junctions 40
CHECK COMPLETION OF /cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_5467

 Iteration 388 batch 942
Size cancer kmers-junctions 14
/cluster/work/grlab/pr

Unnamed: 0,kmer,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated,gtexE2<cancE2
0,QAADQKTKG,177,0.839305,False,False,False


1 Kmers - junctions not found in gtex. Recurrence is:
[8]
1 Kmers - junctions not found in gtex. Junction annotated is:
[False]
1 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 390 batch 45765
Size cancer kmers-junctions 12
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_45765/ref_sample_peptides_meta.gz
(13165, 20)
12 Kmers - junctions not found in gtex. Recurrence is:
[19]
12 Kmers - junctions not found in gtex. Junction annotated is:
[False]
12 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 391 batch 20274
Size cancer kmers-junctions 20
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_20274/ref_sample_peptides_meta.gz
(32837, 20)
20 Kmers - junctions not found in gtex. Recurrence is:
[ 7 86]
20 Kmers - junctions not found in gtex. Junctio

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
16462,FAVQAFKDC,73873207:73873258,73873258:73873264:73873186:73873207:None:None,168,76.376722,True,False
16463,HFAVQAFKD,73873207:73873258,73873258:73873267:73873189:73873207:None:None,168,76.376722,True,False
16464,AASHFAVQA,73873207:73873258,73873258:73873276:73873198:73873207:None:None,168,76.376722,True,False
16465,AVQAFKDCM,73873207:73873258,73873258:73873261:73873183:73873207:None:None,168,76.376722,True,False
16466,SHFAVQAFK,73873207:73873258,73873258:73873270:73873192:73873207:None:None,168,76.376722,True,False
16467,ASHFAVQAF,73873207:73873258,73873258:73873273:73873195:73873207:None:None,168,76.376722,True,False


6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 394 batch 35111
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_35111/ref_sample_peptides_meta.gz
(141222, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[28]
6 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
6 Kmers - junctions not found in gtex. RF annotated is:
[ True]

 Iteration 395 batch 47584
Size cancer kmers-junctions 59
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_47584/ref_sample_peptides_meta.gz
(5606, 20)
59 Kmers - junctions not found in gtex. Recurrence is:
[ 6 10 26 53 85 11]
59 Kmers - junctions not found in gtex. Junction annotated is:
[False]
59 Kmers - junctions

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
0,KEETRPAPE,29793537:29793659,29793659:29793669:29793520:29793537:None:None,1,3.357219,False,False
1,EETRPAPEP,29793537:29793659,29793659:29793666:29793517:29793537:None:None,1,3.357219,False,False
2,TRKEETRPA,29793537:29793659,29793659:29793675:29793526:29793537:None:None,1,3.357219,False,False
3,RKEETRPAP,29793537:29793659,29793659:29793672:29793523:29793537:None:None,1,3.357219,False,False
4,VADRKRSKG,29789047:29789152,29789152:29789159:29789027:29789047:None:None,123,0.839305,False,False
5,STAVADRKR,29789047:29789152,29789152:29789168:29789036:29789047:None:None,123,0.839305,False,False
6,HDSSTAVAD,29789047:29789152,29789152:29789177:29789045:29789047:None:None,123,0.839305,False,False
7,SSTAVADRK,29789047:29789152,29789152:29789171:29789039:29789047:None:None,123,0.839305,False,False
8,TAVADRKRS,29789047:29789152,29789152:29789165:29789033:29789047:None:None,123,0.839305,False,False
9,AVADRKRSK,29789047:29789152,29789152:29789162:29789030:29789047:None:None,123,0.839305,False,False


14 Kmers - junctions not found in gtex. Junction annotated is:
[False  True]
14 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 405 batch 55188
Size cancer kmers-junctions 142
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_55188/ref_sample_peptides_meta.gz
(31686, 20)
142 Kmers - junctions not found in gtex. Recurrence is:
[76 38 18 39 14 75  9 46  5 43 21 34]
142 Kmers - junctions not found in gtex. Junction annotated is:
[ True False]
142 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 406 batch 39856
Size cancer kmers-junctions 234
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_39856/ref_sample_peptides_meta.gz
(1349, 20)
234 Kmers - junctions not found in gtex. Recurrence is:
[148  87 123  82  33 166  65 192  17  24 125  80  77  26  67 

Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
16796,LPAPMAEAA,129314988:129315872,129315872:129315878:129314967:129314988:None:None,148,4.196523,True,False
16797,KAARPSPAS,129315296:129315341,129315341:129315358:129315286:129315296:None:None,87,0.839305,False,False
16798,LAKIYTEAA,129314973:129315683,129315683:129315704:129314967:129314973:None:None,123,1.678609,True,False
16799,PRTKRRRAL,129315380:129315464,129315464:129315484:129315373:129315380:None:None,82,1.678609,False,False
16800,RKKLEGAGG,129315328:129315532,129315532:129315551:129315320:129315328:None:None,33,0.839305,True,False
...,...,...,...,...,...,...,...
17025,KIYTEAKKA,129315464:129315674,129315674:129315698:129315461:129315464:None:None,65,59.590629,True,False
17026,KKAAPAHRA,129314973:129315464,129315464:129315470:129314952:129314973:None:None,67,0.839305,False,False
17027,SIKARGDAE,129314928:129315620,129315620:129315629:129314910:129314928:None:None,24,0.839305,False,False
17028,AGGKKAAHP,129314910:129315314,129315314:129315329:129314898:129314910:None:None,17,10.071656,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
16796,LPAPMAEAA,129314988:129315872,129315872:129315878:129314967:129314988:None:None,148,4.196523,True,False
16797,KAARPSPAS,129315296:129315341,129315341:129315358:129315286:129315296:None:None,87,0.839305,False,False
16798,LAKIYTEAA,129314973:129315683,129315683:129315704:129314967:129314973:None:None,123,1.678609,True,False
16799,PRTKRRRAL,129315380:129315464,129315464:129315484:129315373:129315380:None:None,82,1.678609,False,False
16800,RKKLEGAGG,129315328:129315532,129315532:129315551:129315320:129315328:None:None,33,0.839305,True,False
...,...,...,...,...,...,...,...
17025,KIYTEAKKA,129315464:129315674,129315674:129315698:129315461:129315464:None:None,65,59.590629,True,False
17026,KKAAPAHRA,129314973:129315464,129315464:129315470:129314952:129314973:None:None,67,0.839305,False,False
17027,SIKARGDAE,129314928:129315620,129315620:129315629:129314910:129314928:None:None,24,0.839305,False,False
17028,AGGKKAAHP,129314910:129315314,129315314:129315329:129314898:129314910:None:None,17,10.071656,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
16796,LPAPMAEAA,129314988:129315872,129315872:129315878:129314967:129314988:None:None,148,4.196523,True,False
16797,KAARPSPAS,129315296:129315341,129315341:129315358:129315286:129315296:None:None,87,0.839305,False,False
16798,LAKIYTEAA,129314973:129315683,129315683:129315704:129314967:129314973:None:None,123,1.678609,True,False
16799,PRTKRRRAL,129315380:129315464,129315464:129315484:129315373:129315380:None:None,82,1.678609,False,False
16800,RKKLEGAGG,129315328:129315532,129315532:129315551:129315320:129315328:None:None,33,0.839305,True,False
...,...,...,...,...,...,...,...
17025,KIYTEAKKA,129315464:129315674,129315674:129315698:129315461:129315464:None:None,65,59.590629,True,False
17026,KKAAPAHRA,129314973:129315464,129315464:129315470:129314952:129314973:None:None,67,0.839305,False,False
17027,SIKARGDAE,129314928:129315620,129315620:129315629:129314910:129314928:None:None,24,0.839305,False,False
17028,AGGKKAAHP,129314910:129315314,129315314:129315329:129314898:129314910:None:None,17,10.071656,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
16796,LPAPMAEAA,129314988:129315872,129315872:129315878:129314967:129314988:None:None,148,4.196523,True,False
16797,KAARPSPAS,129315296:129315341,129315341:129315358:129315286:129315296:None:None,87,0.839305,False,False
16798,LAKIYTEAA,129314973:129315683,129315683:129315704:129314967:129314973:None:None,123,1.678609,True,False
16799,PRTKRRRAL,129315380:129315464,129315464:129315484:129315373:129315380:None:None,82,1.678609,False,False
16800,RKKLEGAGG,129315328:129315532,129315532:129315551:129315320:129315328:None:None,33,0.839305,True,False
...,...,...,...,...,...,...,...
17025,KIYTEAKKA,129315464:129315674,129315674:129315698:129315461:129315464:None:None,65,59.590629,True,False
17026,KKAAPAHRA,129314973:129315464,129315464:129315470:129314952:129314973:None:None,67,0.839305,False,False
17027,SIKARGDAE,129314928:129315620,129315620:129315629:129314910:129314928:None:None,24,0.839305,False,False
17028,AGGKKAAHP,129314910:129315314,129315314:129315329:129314898:129314910:None:None,17,10.071656,False,False


Unnamed: 0,kmer,junction_coordinate,coord,cancerCohortfilter >0.0,TCGA25131901A01R156513all,readFrameAnnotated,junctionAnnotated
16796,LPAPMAEAA,129314988:129315872,129315872:129315878:129314967:129314988:None:None,148,4.196523,True,False
16797,KAARPSPAS,129315296:129315341,129315341:129315358:129315286:129315296:None:None,87,0.839305,False,False
16798,LAKIYTEAA,129314973:129315683,129315683:129315704:129314967:129314973:None:None,123,1.678609,True,False
16799,PRTKRRRAL,129315380:129315464,129315464:129315484:129315373:129315380:None:None,82,1.678609,False,False
16800,RKKLEGAGG,129315328:129315532,129315532:129315551:129315320:129315328:None:None,33,0.839305,True,False
...,...,...,...,...,...,...,...
17025,KIYTEAKKA,129315464:129315674,129315674:129315698:129315461:129315464:None:None,65,59.590629,True,False
17026,KKAAPAHRA,129314973:129315464,129315464:129315470:129314952:129314973:None:None,67,0.839305,False,False
17027,SIKARGDAE,129314928:129315620,129315620:129315629:129314910:129314928:None:None,24,0.839305,False,False
17028,AGGKKAAHP,129314910:129315314,129315314:129315329:129314898:129314910:None:None,17,10.071656,False,False


234 Kmers - junctions not found in gtex. Junction annotated is:
[False]
234 Kmers - junctions not found in gtex. RF annotated is:
[ True False]

 Iteration 407 batch 32088
Size cancer kmers-junctions 2
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_32088/ref_sample_peptides_meta.gz
(283275, 20)
2 Kmers - junctions not found in gtex. Recurrence is:
[3]
2 Kmers - junctions not found in gtex. Junction annotated is:
[False]
2 Kmers - junctions not found in gtex. RF annotated is:
[False]

 Iteration 408 batch 41780
Size cancer kmers-junctions 6
/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_c4dd02c_conf2_RFall_ref/cohort_mutNone/tmp_out_ref_batch_41780/ref_sample_peptides_meta.gz
(19633, 20)
6 Kmers - junctions not found in gtex. Recurrence is:
[3]
6 Kmers - junctions not found in gtex. Junction annotated is:
[False]
6 Kmers - junctions not found in gt

EmptyDataError: No columns to parse from file

In [18]:
print(filt_meta_pb.shape)
print(filt_meta_pb[['kmer', 'junction_coordinate']].drop_duplicates().shape)

(131962, 26)
(119786, 2)


In [19]:
df_kmers_no_metadata = pd.concat(kmers_no_metadata, axis = 0 )
print(f'{df_kmers_no_metadata.shape[0]} kmers without metadata')
print(f'{df_kmers_no_metadata.drop_duplicates().shape[0]} UNIQUE kmers without metadata')

506 kmers without metadata
434 UNIQUE kmers without metadata


In [20]:
df_kmers_not_in_gtex = pd.concat(kmers_not_in_gtex, axis = 0 )
print(f'{df_kmers_not_in_gtex.shape[0]} kmers completely absent in GTEX')
print(f'{df_kmers_not_in_gtex.drop_duplicates().shape[0]} UNIQUE kmers completely absent in GTEX')

16724 kmers completely absent in GTEX
14990 UNIQUE kmers completely absent in GTEX


In [21]:
df_check_hypothesis = pd.concat(check_hypothesis, axis = 0 )
print(f'{df_check_hypothesis.shape[0]} kmers coming from novel exons')
print(f'{df_check_hypothesis.drop_duplicates().shape[0]} UNIQUE kmers coming from novel exons')

126 kmers coming from novel exons
125 UNIQUE kmers coming from novel exons


In [22]:
# Create new labels based on the false positive check above 
# The sets no_metadata, not_GTEX, check_hypothesis are disjoint
print(set(df_kmers_no_metadata['kmer']).intersection( set(df_kmers_not_in_gtex['kmer'])))
print(set(df_kmers_no_metadata['kmer']).intersection( set(df_check_hypothesis['kmer'])))
print(set(df_kmers_not_in_gtex['kmer']).intersection( set(df_check_hypothesis['kmer'])))

# Drop duplicates
df_kmers_no_metadata = df_kmers_no_metadata.drop_duplicates() 
df_kmers_not_in_gtex = df_kmers_not_in_gtex.drop_duplicates()
df_check_hypothesis = df_check_hypothesis.drop_duplicates()

df_no_info = df_kmers_no_metadata.copy()
df_no_info['info_not_available'] = True

# Augment df with information about GTEX junction presence
df_kmers_not_in_gtex['coordinate_in_GTEX'] = False
df_kmers_no_metadata['coordinate_in_GTEX'] = False # No info
df_coord_in_gtex = pd.concat([df_kmers_not_in_gtex, df_kmers_no_metadata], axis = 0)

# Augment df with information about Novel exon hypothesis
df_kmers_no_metadata.drop('coordinate_in_GTEX', inplace=True, axis=1)
df_kmers_no_metadata['gtexExon2<cancExon2'] = False
df_check_hypothesis['gtexExon2<cancExon2'] = True
df_exon_length = pd.concat([df_check_hypothesis, df_kmers_no_metadata], axis = 0)

filt_meta = filt_meta.merge(df_coord_in_gtex, on = ['kmer', 'junction_coordinate'], how = 'left')
filt_meta['coordinate_in_GTEX'] = filt_meta['coordinate_in_GTEX'].fillna(True)

filt_meta = filt_meta.merge(df_exon_length, on = ['kmer', 'junction_coordinate'], how = 'left')
filt_meta['gtexExon2<cancExon2'] = filt_meta['gtexExon2<cancExon2'].fillna(False)

filt_meta = filt_meta.merge(df_no_info, on = ['kmer', 'junction_coordinate'], how = 'left')
filt_meta['info_not_available'] = filt_meta['info_not_available'].fillna(False)

display(filt_meta.groupby('gtexExon2<cancExon2').count())
display(filt_meta.groupby('coordinate_in_GTEX').count())

set()
set()
{'GKHEERKYT', 'SQQAASKWT', 'SQQAASKAV'}


Unnamed: 0_level_0,kmer,gtexCohortfilter >0.0,gtexCohortfilter >=1.0,gtexCohortfilter >=2.0,gtexCohortfilter >=3.0,gtexCohortfilter >=5.0,gtexCohortfilter >=10.0,coord,junctionAnnotated,readFrameAnnotated,...,TCGA25131901A01R156513all,TCGA25131301A01R156513all,TCGA61200801A02R156813all,TCGA24143101A01R156613all,TCGA24229801A01R156913all,isAnnotated,strand,junction_coordinate,coordinate_in_GTEX,info_not_available
gtexExon2<cancExon2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,131835,676,676,676,676,676,676,131835,131835,131835,...,131835,131835,131835,131835,131835,21,131835,131835,131835,131835
True,127,7,7,7,7,7,7,127,127,127,...,127,127,127,127,127,1,127,127,127,127


Unnamed: 0_level_0,kmer,gtexCohortfilter >0.0,gtexCohortfilter >=1.0,gtexCohortfilter >=2.0,gtexCohortfilter >=3.0,gtexCohortfilter >=5.0,gtexCohortfilter >=10.0,coord,junctionAnnotated,readFrameAnnotated,...,TCGA25131901A01R156513all,TCGA25131301A01R156513all,TCGA61200801A02R156813all,TCGA24143101A01R156613all,TCGA24229801A01R156913all,isAnnotated,strand,junction_coordinate,gtexExon2<cancExon2,info_not_available
coordinate_in_GTEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,17230,14,14,14,14,14,14,17230,17230,17230,...,17230,17230,17230,17230,17230,0,17230,17230,17230,17230
True,114732,669,669,669,669,669,669,114732,114732,114732,...,114732,114732,114732,114732,114732,22,114732,114732,114732,114732


In [23]:
path_interest = f'G_{sample_target}_SampleLim0.0CohortLimNoneAcrossNone_FiltNormalsGtexCohortCohortlim0.0Across1.tsv.gz'
path_interest_with_advanced_meta = os.path.join(output_dir, path_interest.replace('.tsv.gz', 'metadata2.tsv.gz'))
print(f'Saving kmers with advanced metadata to {path_interest_with_advanced_meta}')
filt_meta.to_csv(path_interest_with_advanced_meta, sep = '\t', compression = 'gzip', index = None)

Saving kmers with advanced metadata to /cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/CANCER_eth/commit_c4dd02c_conf2_Frame_cap0_runs/TCGA_Ovarian_374/filtering_samples/filters_22March_order_wany_wAnnot/G_TCGA-25-1319-01A-01R-1565-13_SampleLim0.0CohortLimNoneAcrossNone_FiltNormalsGtexCohortCohortlim0.0Across1metadata2.tsv.gz


## Exploratory

In [None]:
pd.read

### Remove GTEX annotation 

In [19]:
# path_annot = glob.glob('/cluster/work/grlab/projects/projects2020_OHSU/peptides_generation/GTEX2019_eth/GTEX2019_6920432_ANNOT_conf2_RFall_ref/cohort_mutNone/*/ref_annot_kmer.gz')

# print(len(path_annot))

In [20]:
# for idx, annot in enumerate(path_annot):
#     kmers_filter_pipeline = len(kmer_post_filter)
#     annot = pd.read_csv(annot, sep = '\t')
#     annot = set(annot['kmer'])
#     kmer_post_filter = kmer_post_filter.difference(annot)
#     if kmers_filter_pipeline != len(kmer_post_filter):
#         print(path_annot[idx], len(kmer_post_filter))