## RNA-seq Pipeline for Detecting Exitrons (Exonic Introns)

### 0. Installs

In [1]:
import pandas as pd
import glob
import os
from tqdm import tqdm # progress tracker
import pyranges as pr # parsing gff
import numpy as np
import pysam

### 1. Parse Regtools Data 
- Parse raw junction data from regtools output files

- Processes 1 file at a time

In [2]:
def parseJunctionFile(file_path):
    # column names for RegTools junction files
    regtools_column_names = [
        'chrom', 'start_anchor', 'end_anchor', 'name', 'score', 'strand',
        'thick_start_orig', 'thick_end_orig', 'item_rgb_orig',
        'block_count_orig', 'block_sizes_orig', 'block_starts_orig'
    ]
    
    # extract sample ID from the filename
    sample_id = os.path.basename(file_path).split('.')[0]
    
    # read the file into a pandas DataFrame
    df = pd.read_csv(
        file_path, sep='\t', header=None, names=regtools_column_names,
        dtype={'chrom': str, 'block_sizes_orig': str, 'block_starts_orig': str}
    )
        
    df['sample_id_source'] = sample_id

    # convert relevant columns to numeric types, coercing errors
    for col in ['start_anchor', 'end_anchor', 'score']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # drop rows if info is missing
    df.dropna(subset=['start_anchor', 'end_anchor', 'score', 'block_sizes_orig'], inplace=True)
    
    # ensure int types
    for col in ['start_anchor', 'end_anchor', 'score']:
        df[col] = df[col].astype(int)

    return df

### 2. Transform Junction Data
- Recalculates junction coordinates, following Regtools documentation to take into account blockSize

- Recalculates block size to represent length of junction

- Outputs junction info in BED12 format

In [3]:
def transformJunctionData(raw_df):
    
    # CHROMOSOME FILTERING
    original_row_count = len(raw_df)
    
    # allowed chromosomes
    allowed_chrom_numbers = [str(i) for i in range(1, 23)]
    allowed_sex_chroms_upper = ['X', 'Y'] 
    allowed_chromosomes = set()
    for num_chrom in allowed_chrom_numbers:
        allowed_chromosomes.add(num_chrom)
        allowed_chromosomes.add(f"chr{num_chrom}")
    for sex_chrom in allowed_sex_chroms_upper:
        allowed_chromosomes.add(sex_chrom)
        allowed_chromosomes.add(sex_chrom.lower())
        allowed_chromosomes.add(f"chr{sex_chrom}")
        allowed_chromosomes.add(f"chr{sex_chrom.lower()}")
    
    raw_df_filtered = raw_df[raw_df['chrom'].isin(allowed_chromosomes)].copy()
    filtered_row_count = len(raw_df_filtered)
    print(f"Removed {original_row_count - filtered_row_count} rows with non-standard chromosomes.")


    # JUNCTION COORD CORRECTION
    # filter rows for valid blocks
    parsed_blocks_list = raw_df_filtered['block_sizes_orig'].str.strip(',').str.split(',')
    has_sufficient_blocks = parsed_blocks_list.str.len() >= 2
    raw_df_filtered = raw_df_filtered[has_sufficient_blocks].copy()
    parsed_blocks_list = parsed_blocks_list[has_sufficient_blocks]
    
    # recalculating junction coordinates
    raw_df_filtered.loc[:, 'overhang_left'] = parsed_blocks_list.str[0].astype(int)
    raw_df_filtered.loc[:, 'overhang_right'] = parsed_blocks_list.str[1].astype(int)

    junc_start = raw_df_filtered['start_anchor'] + raw_df_filtered['overhang_left']
    junc_end = raw_df_filtered['end_anchor'] - raw_df_filtered['overhang_right']

    # filter out invalid junctions
    valid_junction = junc_start < junc_end
    raw_df_filtered = raw_df_filtered[valid_junction].copy()
    junc_start = junc_start[valid_junction]
    junc_end = junc_end[valid_junction]


    junc_length = junc_end - junc_start

    # create df
    transformed_df = pd.DataFrame()
    transformed_df['chrom'] = raw_df_filtered['chrom']
    transformed_df['chromStart'] = junc_start
    transformed_df['chromEnd'] = junc_end
    transformed_df['name'] = raw_df_filtered['name']
    transformed_df['score'] = raw_df_filtered['score']
    transformed_df['strand'] = raw_df_filtered['strand']
    transformed_df['thickStart'] = junc_start
    transformed_df['thickEnd'] = junc_end
    transformed_df['itemRgb'] = raw_df_filtered['item_rgb_orig']
    transformed_df['blockCount'] = 1
    transformed_df['blockSizes'] = junc_length.astype(str)
    transformed_df['blockStarts'] = "0"
    transformed_df['sample_id_source'] = raw_df_filtered['sample_id_source']

    print(f"Transformed {len(transformed_df)} junction records.")
    
    return transformed_df

### 3. Find Exitrons Within Junction Data
- Converts transformed junction data (transformed_df) and exon data (from gff3 file) into PyRanges objects with labels Chromosome, Start, End, Strand, and Title (a unique junction id formed by chrom:start:end:strand)

- Finds junctions that overlap with CDS regions using PyRanges method .overlap

In [4]:
# convert CDS data to PyRanges object
gff = pr.read_gff3("gencode.v48.annotation.gff3.gz")
cds = gff[gff.Feature == "CDS"]
print(f"Found {len(cds)} CDS regions.")

Found 903356 CDS regions.


In [5]:
def findExitrons(transformed_df):
    transformed_df = transformed_df[transformed_df['strand'].isin(['+', '-'])]

    # generate a unique ID for each junction (chrom:start:end:strand
    unique_id = transformed_df['chrom'].astype(str) + ':' + \
                transformed_df['chromStart'].astype(str) + ':' + \
                transformed_df['chromEnd'].astype(str) + ':' + \
                transformed_df['strand'].astype(str)

    # convert junction data to PyRanges object
    junction_pr = pr.PyRanges({'Chromosome': transformed_df['chrom'],
                    'Start': transformed_df['chromStart'],
                    'End': transformed_df['chromEnd'],
                    'Strand': transformed_df['strand'],
                    'title': unique_id,
                    'reads': transformed_df['score'],
                    'sourceID': transformed_df['sample_id_source']}) 

    # find overlapping junctions
    contained_junctions = junction_pr.overlap(cds, contained_intervals_only=True, strand_behavior='same')
    print(f"Found {len(contained_junctions)} junctions contained within CDS regions.")
            
    return contained_junctions

### 4. Compile All Exitron Info
- Iterates through each person's file, finding all exitron data then concatenating to a final matrix

- Includes person ID (file name) and junction scores (total reads)

In [6]:
def compileExitronData(directory_path, output_filepath, file_pattern="*.bam.junc"):

    all_exitron_info = []
    file_paths = glob.glob(os.path.join(directory_path, file_pattern))
    print(f"Found {len(file_paths)} files to process.")

    # testing first 5 out of 100
    '''
    files_to_process = file_paths[:5]
    print(f"Processing the first {len(files_to_process)} files.")
    '''

    for file_path in tqdm(file_paths):
        print("Parsing new file...")
        file_name_only = os.path.basename(file_path)
        try:
            # 1.
            parsed_data = parseJunctionFile(file_path)
            # 2.
            transformed_df = transformJunctionData(parsed_data)
            # 3.
            gr_file = findExitrons(transformed_df)
            #4.
            all_exitron_info.append(gr_file)

        # skip to the next file if an error occurs
        except Exception as e:
            print(f"An error occurred while processing file {file_name_only}: {e}")
            import traceback
            traceback.print_exc()
            continue 

    # concatenate all individual data into matrix 
    final_gr = pr.concat(all_exitron_info)
    print(f"\nSuccessfully compiled exitron data from {len(all_exitron_info)} files.")
    final_gr.df.to_parquet(output_filepath, index=False)
    print(f"Successfully saved data to {output_filepath}")
    return final_gr

In [None]:
data = compileExitronData("/gpfs/commons/groups/knowles_lab/atokolyi/als/juncs_min6bp/", "/gpfs/commons/home/ncui/project/exitron_data.parquet", file_pattern="*.bam.junc")

Found 1876 files to process.


  0%|          | 0/1876 [00:00<?, ?it/s]

Parsing new file...
Removed 898 rows with non-standard chromosomes.
Transformed 245111 junction records.


  0%|          | 1/1876 [00:01<59:30,  1.90s/it]

Found 7873 junctions contained within CDS regions.
Parsing new file...
Removed 1401 rows with non-standard chromosomes.
Transformed 281682 junction records.


  0%|          | 2/1876 [00:03<1:01:57,  1.98s/it]

Found 10733 junctions contained within CDS regions.
Parsing new file...
Removed 2779 rows with non-standard chromosomes.
Transformed 311550 junction records.


  0%|          | 3/1876 [00:06<1:06:30,  2.13s/it]

Found 13517 junctions contained within CDS regions.
Parsing new file...
Removed 903 rows with non-standard chromosomes.
Transformed 274854 junction records.


  0%|          | 4/1876 [00:08<1:06:25,  2.13s/it]

Found 11393 junctions contained within CDS regions.
Parsing new file...
Removed 1158 rows with non-standard chromosomes.
Transformed 303449 junction records.


  0%|          | 5/1876 [00:10<1:07:12,  2.16s/it]

Found 8532 junctions contained within CDS regions.
Parsing new file...
Removed 797 rows with non-standard chromosomes.
Transformed 272826 junction records.


  0%|          | 6/1876 [00:12<1:06:39,  2.14s/it]

Found 11530 junctions contained within CDS regions.
Parsing new file...
Removed 1401 rows with non-standard chromosomes.
Transformed 429798 junction records.


  0%|          | 7/1876 [00:15<1:13:57,  2.37s/it]

Found 19533 junctions contained within CDS regions.
Parsing new file...
Removed 1689 rows with non-standard chromosomes.
Transformed 274252 junction records.


  0%|          | 8/1876 [00:17<1:11:09,  2.29s/it]

Found 11662 junctions contained within CDS regions.
Parsing new file...
Removed 1071 rows with non-standard chromosomes.
Transformed 286807 junction records.


  0%|          | 9/1876 [00:19<1:09:12,  2.22s/it]

Found 13559 junctions contained within CDS regions.
Parsing new file...
Removed 3154 rows with non-standard chromosomes.
Transformed 277697 junction records.


  1%|          | 10/1876 [00:21<1:07:56,  2.18s/it]

Found 11014 junctions contained within CDS regions.
Parsing new file...
Removed 518 rows with non-standard chromosomes.
Transformed 242223 junction records.


  1%|          | 11/1876 [00:23<1:06:00,  2.12s/it]

Found 7248 junctions contained within CDS regions.
Parsing new file...
Removed 716 rows with non-standard chromosomes.
Transformed 259781 junction records.


  1%|          | 12/1876 [00:25<1:04:09,  2.06s/it]

Found 7208 junctions contained within CDS regions.
Parsing new file...
Removed 681 rows with non-standard chromosomes.
Transformed 293495 junction records.


  1%|          | 13/1876 [00:27<1:05:27,  2.11s/it]

Found 8172 junctions contained within CDS regions.
Parsing new file...
Removed 1329 rows with non-standard chromosomes.
Transformed 287014 junction records.


  1%|          | 14/1876 [00:30<1:06:17,  2.14s/it]

Found 12506 junctions contained within CDS regions.
Parsing new file...
Removed 1941 rows with non-standard chromosomes.
Transformed 302737 junction records.


  1%|          | 15/1876 [00:32<1:07:27,  2.17s/it]

Found 9540 junctions contained within CDS regions.
Parsing new file...
Removed 1580 rows with non-standard chromosomes.
Transformed 293511 junction records.


  1%|          | 16/1876 [00:34<1:07:44,  2.19s/it]

Found 11544 junctions contained within CDS regions.
Parsing new file...
Removed 1204 rows with non-standard chromosomes.
Transformed 262175 junction records.


  1%|          | 17/1876 [00:36<1:06:29,  2.15s/it]

Found 9127 junctions contained within CDS regions.
Parsing new file...
Removed 824 rows with non-standard chromosomes.
Transformed 249921 junction records.


  1%|          | 18/1876 [00:38<1:04:50,  2.09s/it]

Found 7318 junctions contained within CDS regions.
Parsing new file...
Removed 1209 rows with non-standard chromosomes.
Transformed 241282 junction records.


  1%|          | 19/1876 [00:40<1:03:13,  2.04s/it]

Found 6259 junctions contained within CDS regions.
Parsing new file...
Removed 1286 rows with non-standard chromosomes.
Transformed 283791 junction records.


  1%|          | 20/1876 [00:42<1:04:27,  2.08s/it]

Found 11898 junctions contained within CDS regions.
Parsing new file...
Removed 1347 rows with non-standard chromosomes.
Transformed 272400 junction records.


  1%|          | 21/1876 [00:44<1:04:47,  2.10s/it]

Found 11974 junctions contained within CDS regions.
Parsing new file...
Removed 862 rows with non-standard chromosomes.
Transformed 275701 junction records.


  1%|          | 22/1876 [00:47<1:05:02,  2.10s/it]

Found 7942 junctions contained within CDS regions.
Parsing new file...
Removed 938 rows with non-standard chromosomes.
Transformed 280869 junction records.


  1%|          | 23/1876 [00:49<1:05:44,  2.13s/it]

Found 8670 junctions contained within CDS regions.
Parsing new file...
Removed 951 rows with non-standard chromosomes.
Transformed 271904 junction records.


  1%|▏         | 24/1876 [00:51<1:05:24,  2.12s/it]

Found 10116 junctions contained within CDS regions.
Parsing new file...
Removed 1781 rows with non-standard chromosomes.
Transformed 300001 junction records.


  1%|▏         | 25/1876 [00:53<1:06:33,  2.16s/it]

Found 12210 junctions contained within CDS regions.
Parsing new file...
Removed 1024 rows with non-standard chromosomes.
Transformed 284127 junction records.


  1%|▏         | 26/1876 [00:55<1:06:47,  2.17s/it]

Found 13564 junctions contained within CDS regions.
Parsing new file...
Removed 1069 rows with non-standard chromosomes.
Transformed 251705 junction records.


  1%|▏         | 27/1876 [00:57<1:04:58,  2.11s/it]

Found 9606 junctions contained within CDS regions.
Parsing new file...
Removed 1162 rows with non-standard chromosomes.
Transformed 337030 junction records.


  1%|▏         | 28/1876 [01:00<1:07:52,  2.20s/it]

Found 14563 junctions contained within CDS regions.
Parsing new file...
Removed 1311 rows with non-standard chromosomes.
Transformed 263331 junction records.


  2%|▏         | 29/1876 [01:02<1:06:30,  2.16s/it]

Found 9907 junctions contained within CDS regions.
Parsing new file...
Removed 1274 rows with non-standard chromosomes.
Transformed 287924 junction records.


  2%|▏         | 30/1876 [01:04<1:06:09,  2.15s/it]

Found 13164 junctions contained within CDS regions.
Parsing new file...
Removed 1715 rows with non-standard chromosomes.
Transformed 245919 junction records.


  2%|▏         | 31/1876 [01:06<1:03:57,  2.08s/it]

Found 6687 junctions contained within CDS regions.
Parsing new file...
Removed 1253 rows with non-standard chromosomes.
Transformed 293715 junction records.


  2%|▏         | 32/1876 [01:08<1:04:29,  2.10s/it]

Found 15321 junctions contained within CDS regions.
Parsing new file...
Removed 1065 rows with non-standard chromosomes.
Transformed 281064 junction records.


  2%|▏         | 33/1876 [01:10<1:04:56,  2.11s/it]

Found 10786 junctions contained within CDS regions.
Parsing new file...
Removed 1720 rows with non-standard chromosomes.
Transformed 280563 junction records.


  2%|▏         | 34/1876 [01:12<1:04:50,  2.11s/it]

Found 7060 junctions contained within CDS regions.
Parsing new file...
Removed 994 rows with non-standard chromosomes.
Transformed 318599 junction records.


  2%|▏         | 35/1876 [01:15<1:08:03,  2.22s/it]

Found 10874 junctions contained within CDS regions.
Parsing new file...
Removed 1520 rows with non-standard chromosomes.
Transformed 303811 junction records.


  2%|▏         | 36/1876 [01:17<1:08:53,  2.25s/it]

Found 13454 junctions contained within CDS regions.
Parsing new file...
Removed 840 rows with non-standard chromosomes.
Transformed 266151 junction records.


  2%|▏         | 37/1876 [01:19<1:07:11,  2.19s/it]

Found 10037 junctions contained within CDS regions.
Parsing new file...
Removed 2209 rows with non-standard chromosomes.
Transformed 307516 junction records.


  2%|▏         | 38/1876 [01:21<1:07:57,  2.22s/it]

Found 10051 junctions contained within CDS regions.
Parsing new file...
Removed 512 rows with non-standard chromosomes.
Transformed 225968 junction records.


  2%|▏         | 39/1876 [01:23<1:04:42,  2.11s/it]

Found 4968 junctions contained within CDS regions.
Parsing new file...
Removed 976 rows with non-standard chromosomes.
Transformed 254960 junction records.


  2%|▏         | 40/1876 [01:25<1:03:22,  2.07s/it]

Found 6415 junctions contained within CDS regions.
Parsing new file...
Removed 1245 rows with non-standard chromosomes.
Transformed 322124 junction records.


  2%|▏         | 41/1876 [01:27<1:06:30,  2.17s/it]

Found 13113 junctions contained within CDS regions.
Parsing new file...
Removed 1021 rows with non-standard chromosomes.
Transformed 278340 junction records.


  2%|▏         | 42/1876 [01:30<1:05:59,  2.16s/it]

Found 10972 junctions contained within CDS regions.
Parsing new file...
Removed 4913 rows with non-standard chromosomes.
Transformed 455598 junction records.


  2%|▏         | 43/1876 [01:33<1:16:13,  2.50s/it]

Found 27419 junctions contained within CDS regions.
Parsing new file...
Removed 634 rows with non-standard chromosomes.
Transformed 286211 junction records.


  2%|▏         | 44/1876 [01:35<1:14:32,  2.44s/it]

Found 6722 junctions contained within CDS regions.
Parsing new file...
Removed 1355 rows with non-standard chromosomes.
Transformed 300077 junction records.


  2%|▏         | 45/1876 [01:37<1:12:24,  2.37s/it]

Found 17538 junctions contained within CDS regions.
Parsing new file...
Removed 722 rows with non-standard chromosomes.
Transformed 252979 junction records.


  2%|▏         | 46/1876 [01:39<1:08:59,  2.26s/it]

Found 8730 junctions contained within CDS regions.
Parsing new file...
Removed 3616 rows with non-standard chromosomes.
Transformed 436556 junction records.


  3%|▎         | 47/1876 [01:42<1:15:23,  2.47s/it]

Found 21751 junctions contained within CDS regions.
Parsing new file...
Removed 763 rows with non-standard chromosomes.
Transformed 231604 junction records.


  3%|▎         | 48/1876 [01:44<1:10:54,  2.33s/it]

Found 7050 junctions contained within CDS regions.
Parsing new file...
Removed 692 rows with non-standard chromosomes.
Transformed 275664 junction records.


  3%|▎         | 49/1876 [01:46<1:08:03,  2.24s/it]

Found 9278 junctions contained within CDS regions.
Parsing new file...
Removed 2733 rows with non-standard chromosomes.
Transformed 436244 junction records.


  3%|▎         | 50/1876 [01:50<1:16:38,  2.52s/it]

Found 24046 junctions contained within CDS regions.
Parsing new file...
Removed 704 rows with non-standard chromosomes.
Transformed 283788 junction records.


  3%|▎         | 51/1876 [01:52<1:13:42,  2.42s/it]

Found 8001 junctions contained within CDS regions.
Parsing new file...
Removed 729 rows with non-standard chromosomes.
Transformed 277716 junction records.


  3%|▎         | 52/1876 [01:54<1:10:46,  2.33s/it]

Found 9140 junctions contained within CDS regions.
Parsing new file...
Removed 1452 rows with non-standard chromosomes.
Transformed 308731 junction records.


  3%|▎         | 53/1876 [01:56<1:10:22,  2.32s/it]

Found 13987 junctions contained within CDS regions.
Parsing new file...
Removed 1405 rows with non-standard chromosomes.
Transformed 240484 junction records.


  3%|▎         | 54/1876 [01:58<1:06:49,  2.20s/it]

Found 5335 junctions contained within CDS regions.
Parsing new file...
Removed 861 rows with non-standard chromosomes.
Transformed 273951 junction records.


  3%|▎         | 55/1876 [02:00<1:05:45,  2.17s/it]

Found 6854 junctions contained within CDS regions.
Parsing new file...
Removed 2156 rows with non-standard chromosomes.
Transformed 362800 junction records.


  3%|▎         | 56/1876 [02:03<1:13:28,  2.42s/it]

Found 13200 junctions contained within CDS regions.
Parsing new file...
Removed 827 rows with non-standard chromosomes.
Transformed 252001 junction records.


  3%|▎         | 57/1876 [02:05<1:09:12,  2.28s/it]

Found 11945 junctions contained within CDS regions.
Parsing new file...
Removed 1721 rows with non-standard chromosomes.
Transformed 293845 junction records.


  3%|▎         | 58/1876 [02:07<1:09:11,  2.28s/it]

Found 7942 junctions contained within CDS regions.
Parsing new file...
Removed 1710 rows with non-standard chromosomes.
Transformed 302640 junction records.


  3%|▎         | 59/1876 [02:10<1:09:39,  2.30s/it]

Found 12862 junctions contained within CDS regions.
Parsing new file...
Removed 1536 rows with non-standard chromosomes.
Transformed 285577 junction records.


  3%|▎         | 60/1876 [02:12<1:08:54,  2.28s/it]

Found 13375 junctions contained within CDS regions.
Parsing new file...
Removed 1656 rows with non-standard chromosomes.
Transformed 270616 junction records.


  3%|▎         | 61/1876 [02:14<1:07:03,  2.22s/it]

Found 8769 junctions contained within CDS regions.
Parsing new file...
Removed 1262 rows with non-standard chromosomes.
Transformed 283769 junction records.


  3%|▎         | 62/1876 [02:16<1:06:34,  2.20s/it]

Found 11360 junctions contained within CDS regions.
Parsing new file...
Removed 1235 rows with non-standard chromosomes.
Transformed 298579 junction records.


  3%|▎         | 63/1876 [02:19<1:06:53,  2.21s/it]

Found 11272 junctions contained within CDS regions.
Parsing new file...
Removed 846 rows with non-standard chromosomes.
Transformed 292376 junction records.


  3%|▎         | 64/1876 [02:21<1:06:30,  2.20s/it]

Found 10680 junctions contained within CDS regions.
Parsing new file...
Removed 405 rows with non-standard chromosomes.
Transformed 225089 junction records.


  3%|▎         | 65/1876 [02:23<1:03:19,  2.10s/it]

Found 5062 junctions contained within CDS regions.
Parsing new file...
Removed 1894 rows with non-standard chromosomes.
Transformed 309666 junction records.


  4%|▎         | 66/1876 [02:25<1:05:02,  2.16s/it]

Found 16453 junctions contained within CDS regions.
Parsing new file...
Removed 537 rows with non-standard chromosomes.
Transformed 252777 junction records.


  4%|▎         | 67/1876 [02:27<1:03:24,  2.10s/it]

Found 6550 junctions contained within CDS regions.
Parsing new file...
Removed 2168 rows with non-standard chromosomes.
Transformed 330024 junction records.


  4%|▎         | 68/1876 [02:29<1:05:52,  2.19s/it]

Found 22664 junctions contained within CDS regions.
Parsing new file...
Removed 686 rows with non-standard chromosomes.
Transformed 277524 junction records.


  4%|▎         | 69/1876 [02:31<1:05:28,  2.17s/it]

Found 7759 junctions contained within CDS regions.
Parsing new file...
Removed 751 rows with non-standard chromosomes.
Transformed 240438 junction records.


  4%|▎         | 70/1876 [02:33<1:02:37,  2.08s/it]

Found 6656 junctions contained within CDS regions.
Parsing new file...
Removed 804 rows with non-standard chromosomes.
Transformed 265315 junction records.


  4%|▍         | 71/1876 [02:35<1:02:04,  2.06s/it]

Found 10272 junctions contained within CDS regions.
Parsing new file...
Removed 490 rows with non-standard chromosomes.
Transformed 251942 junction records.


  4%|▍         | 72/1876 [02:37<1:01:07,  2.03s/it]

Found 7296 junctions contained within CDS regions.
Parsing new file...
Removed 805 rows with non-standard chromosomes.
Transformed 251966 junction records.


  4%|▍         | 73/1876 [02:39<1:00:22,  2.01s/it]

Found 6449 junctions contained within CDS regions.
Parsing new file...
Removed 1290 rows with non-standard chromosomes.
Transformed 228843 junction records.


  4%|▍         | 74/1876 [02:41<59:25,  1.98s/it]  

Found 5973 junctions contained within CDS regions.
Parsing new file...
Removed 857 rows with non-standard chromosomes.
Transformed 269389 junction records.


  4%|▍         | 75/1876 [02:43<1:00:07,  2.00s/it]

Found 10222 junctions contained within CDS regions.
Parsing new file...
Removed 1228 rows with non-standard chromosomes.
Transformed 261371 junction records.


  4%|▍         | 76/1876 [02:45<1:00:51,  2.03s/it]

Found 10591 junctions contained within CDS regions.
Parsing new file...
Removed 725 rows with non-standard chromosomes.
Transformed 231349 junction records.


  4%|▍         | 77/1876 [02:47<59:32,  1.99s/it]  

Found 4608 junctions contained within CDS regions.
Parsing new file...
Removed 596 rows with non-standard chromosomes.
Transformed 249122 junction records.


  4%|▍         | 78/1876 [02:49<59:41,  1.99s/it]

Found 7959 junctions contained within CDS regions.
Parsing new file...
Removed 1341 rows with non-standard chromosomes.
Transformed 281855 junction records.


  4%|▍         | 79/1876 [02:51<1:01:59,  2.07s/it]

Found 12401 junctions contained within CDS regions.
Parsing new file...
Removed 1502 rows with non-standard chromosomes.
Transformed 403525 junction records.


  4%|▍         | 80/1876 [02:54<1:08:10,  2.28s/it]

Found 15581 junctions contained within CDS regions.
Parsing new file...
Removed 1308 rows with non-standard chromosomes.
Transformed 299123 junction records.


  4%|▍         | 81/1876 [02:56<1:08:22,  2.29s/it]

Found 21719 junctions contained within CDS regions.
Parsing new file...
Removed 1484 rows with non-standard chromosomes.
Transformed 289299 junction records.


  4%|▍         | 82/1876 [02:59<1:08:25,  2.29s/it]

Found 11405 junctions contained within CDS regions.
Parsing new file...
Removed 1953 rows with non-standard chromosomes.
Transformed 271428 junction records.


  4%|▍         | 83/1876 [03:01<1:07:08,  2.25s/it]

Found 9434 junctions contained within CDS regions.
Parsing new file...
Removed 1379 rows with non-standard chromosomes.
Transformed 277957 junction records.


  4%|▍         | 84/1876 [03:03<1:06:03,  2.21s/it]

Found 13059 junctions contained within CDS regions.
Parsing new file...
Removed 1120 rows with non-standard chromosomes.
Transformed 265468 junction records.


  5%|▍         | 85/1876 [03:05<1:05:04,  2.18s/it]

Found 13481 junctions contained within CDS regions.
Parsing new file...
Removed 1278 rows with non-standard chromosomes.
Transformed 291399 junction records.


  5%|▍         | 86/1876 [03:07<1:05:43,  2.20s/it]

Found 11637 junctions contained within CDS regions.
Parsing new file...
Removed 1093 rows with non-standard chromosomes.
Transformed 289649 junction records.


  5%|▍         | 87/1876 [03:10<1:06:19,  2.22s/it]

Found 11383 junctions contained within CDS regions.
Parsing new file...
Removed 1216 rows with non-standard chromosomes.
Transformed 300629 junction records.


  5%|▍         | 88/1876 [03:12<1:06:35,  2.23s/it]

Found 11760 junctions contained within CDS regions.
Parsing new file...
Removed 922 rows with non-standard chromosomes.
Transformed 249746 junction records.


  5%|▍         | 89/1876 [03:14<1:04:27,  2.16s/it]

Found 6546 junctions contained within CDS regions.
Parsing new file...
Removed 419 rows with non-standard chromosomes.
Transformed 262211 junction records.


  5%|▍         | 90/1876 [03:16<1:03:30,  2.13s/it]

Found 5872 junctions contained within CDS regions.
Parsing new file...
Removed 813 rows with non-standard chromosomes.
Transformed 294308 junction records.


  5%|▍         | 91/1876 [03:18<1:04:36,  2.17s/it]

Found 9630 junctions contained within CDS regions.
Parsing new file...
Removed 421 rows with non-standard chromosomes.
Transformed 239491 junction records.


  5%|▍         | 92/1876 [03:20<1:02:32,  2.10s/it]

Found 5167 junctions contained within CDS regions.
Parsing new file...
Removed 1769 rows with non-standard chromosomes.
Transformed 306876 junction records.


  5%|▍         | 93/1876 [03:22<1:03:55,  2.15s/it]

Found 12439 junctions contained within CDS regions.
Parsing new file...
Removed 1214 rows with non-standard chromosomes.
Transformed 257513 junction records.


  5%|▌         | 94/1876 [03:24<1:03:14,  2.13s/it]

Found 9964 junctions contained within CDS regions.
Parsing new file...
Removed 1085 rows with non-standard chromosomes.
Transformed 282948 junction records.


  5%|▌         | 95/1876 [03:27<1:03:01,  2.12s/it]

Found 10961 junctions contained within CDS regions.
Parsing new file...
Removed 523 rows with non-standard chromosomes.
Transformed 283082 junction records.


  5%|▌         | 96/1876 [03:29<1:03:26,  2.14s/it]

Found 6790 junctions contained within CDS regions.
Parsing new file...
Removed 691 rows with non-standard chromosomes.
Transformed 269135 junction records.


  5%|▌         | 97/1876 [03:31<1:02:37,  2.11s/it]

Found 7805 junctions contained within CDS regions.
Parsing new file...
Removed 1009 rows with non-standard chromosomes.
Transformed 301814 junction records.


  5%|▌         | 98/1876 [03:34<1:09:23,  2.34s/it]

Found 8911 junctions contained within CDS regions.
Parsing new file...
Removed 1174 rows with non-standard chromosomes.
Transformed 277729 junction records.


  5%|▌         | 99/1876 [03:36<1:11:14,  2.41s/it]

Found 10219 junctions contained within CDS regions.
Parsing new file...
Removed 940 rows with non-standard chromosomes.
Transformed 267430 junction records.


  5%|▌         | 100/1876 [03:39<1:13:05,  2.47s/it]

Found 10271 junctions contained within CDS regions.
Parsing new file...
Removed 1036 rows with non-standard chromosomes.
Transformed 318750 junction records.


  5%|▌         | 101/1876 [03:41<1:13:06,  2.47s/it]

Found 14798 junctions contained within CDS regions.
Parsing new file...
Removed 923 rows with non-standard chromosomes.
Transformed 288649 junction records.


  5%|▌         | 102/1876 [03:44<1:11:28,  2.42s/it]

Found 13213 junctions contained within CDS regions.
Parsing new file...
Removed 851 rows with non-standard chromosomes.
Transformed 242063 junction records.


  5%|▌         | 103/1876 [03:46<1:08:50,  2.33s/it]

Found 9468 junctions contained within CDS regions.
Parsing new file...
Removed 1194 rows with non-standard chromosomes.
Transformed 275552 junction records.


  6%|▌         | 104/1876 [03:48<1:09:28,  2.35s/it]

Found 11242 junctions contained within CDS regions.
Parsing new file...
Removed 2212 rows with non-standard chromosomes.
Transformed 260527 junction records.


  6%|▌         | 105/1876 [03:50<1:06:12,  2.24s/it]

Found 10923 junctions contained within CDS regions.
Parsing new file...
Removed 541 rows with non-standard chromosomes.
Transformed 263484 junction records.


  6%|▌         | 106/1876 [03:52<1:04:45,  2.20s/it]

Found 5287 junctions contained within CDS regions.
Parsing new file...
Removed 993 rows with non-standard chromosomes.
Transformed 290475 junction records.


  6%|▌         | 107/1876 [03:55<1:07:48,  2.30s/it]

Found 11730 junctions contained within CDS regions.
Parsing new file...
Removed 756 rows with non-standard chromosomes.
Transformed 257268 junction records.


  6%|▌         | 108/1876 [03:57<1:07:21,  2.29s/it]

Found 7112 junctions contained within CDS regions.
Parsing new file...
Removed 1151 rows with non-standard chromosomes.
Transformed 285625 junction records.


  6%|▌         | 109/1876 [03:59<1:08:38,  2.33s/it]

Found 10493 junctions contained within CDS regions.
Parsing new file...
Removed 853 rows with non-standard chromosomes.
Transformed 303232 junction records.


  6%|▌         | 110/1876 [04:02<1:09:50,  2.37s/it]

Found 9921 junctions contained within CDS regions.
Parsing new file...
Removed 973 rows with non-standard chromosomes.
Transformed 281823 junction records.


  6%|▌         | 111/1876 [04:04<1:09:45,  2.37s/it]

Found 11264 junctions contained within CDS regions.
Parsing new file...
Removed 1184 rows with non-standard chromosomes.
Transformed 303502 junction records.


  6%|▌         | 112/1876 [04:07<1:08:53,  2.34s/it]

Found 11417 junctions contained within CDS regions.
Parsing new file...
Removed 824 rows with non-standard chromosomes.
Transformed 287813 junction records.


  6%|▌         | 113/1876 [04:09<1:07:57,  2.31s/it]

Found 11422 junctions contained within CDS regions.
Parsing new file...
Removed 2115 rows with non-standard chromosomes.
Transformed 305680 junction records.


  6%|▌         | 114/1876 [04:11<1:07:23,  2.29s/it]

Found 8963 junctions contained within CDS regions.
Parsing new file...
Removed 1666 rows with non-standard chromosomes.
Transformed 296623 junction records.


  6%|▌         | 115/1876 [04:13<1:07:16,  2.29s/it]

Found 9618 junctions contained within CDS regions.
Parsing new file...
Removed 1123 rows with non-standard chromosomes.
Transformed 291868 junction records.


  6%|▌         | 116/1876 [04:16<1:06:34,  2.27s/it]

Found 10093 junctions contained within CDS regions.
Parsing new file...
Removed 934 rows with non-standard chromosomes.
Transformed 274038 junction records.


  6%|▌         | 117/1876 [04:18<1:05:09,  2.22s/it]

Found 7634 junctions contained within CDS regions.
Parsing new file...
Removed 630 rows with non-standard chromosomes.
Transformed 229980 junction records.


  6%|▋         | 118/1876 [04:20<1:01:44,  2.11s/it]

Found 6320 junctions contained within CDS regions.
Parsing new file...
Removed 1573 rows with non-standard chromosomes.
Transformed 319682 junction records.


  6%|▋         | 119/1876 [04:22<1:03:56,  2.18s/it]

Found 12777 junctions contained within CDS regions.
Parsing new file...
Removed 575 rows with non-standard chromosomes.
Transformed 252733 junction records.


  6%|▋         | 120/1876 [04:24<1:02:09,  2.12s/it]

Found 8311 junctions contained within CDS regions.
Parsing new file...
Removed 2022 rows with non-standard chromosomes.
Transformed 305338 junction records.


  6%|▋         | 121/1876 [04:26<1:03:10,  2.16s/it]

Found 10549 junctions contained within CDS regions.
Parsing new file...
Removed 1640 rows with non-standard chromosomes.
Transformed 306692 junction records.


  7%|▋         | 122/1876 [04:29<1:05:43,  2.25s/it]

Found 19226 junctions contained within CDS regions.
Parsing new file...
Removed 671 rows with non-standard chromosomes.
Transformed 270306 junction records.


  7%|▋         | 123/1876 [04:31<1:04:38,  2.21s/it]

Found 9744 junctions contained within CDS regions.
Parsing new file...
Removed 1096 rows with non-standard chromosomes.
Transformed 268584 junction records.


  7%|▋         | 124/1876 [04:33<1:03:48,  2.19s/it]

Found 9600 junctions contained within CDS regions.
Parsing new file...
Removed 466 rows with non-standard chromosomes.
Transformed 258806 junction records.


  7%|▋         | 125/1876 [04:35<1:03:13,  2.17s/it]

Found 6020 junctions contained within CDS regions.
Parsing new file...
Removed 1257 rows with non-standard chromosomes.
Transformed 297576 junction records.


  7%|▋         | 126/1876 [04:37<1:04:21,  2.21s/it]

Found 15940 junctions contained within CDS regions.
Parsing new file...
Removed 901 rows with non-standard chromosomes.
Transformed 275126 junction records.


  7%|▋         | 127/1876 [04:39<1:04:18,  2.21s/it]

Found 15172 junctions contained within CDS regions.
Parsing new file...
Removed 790 rows with non-standard chromosomes.
Transformed 261760 junction records.


  7%|▋         | 128/1876 [04:42<1:03:03,  2.16s/it]

Found 12948 junctions contained within CDS regions.
Parsing new file...
Removed 3192 rows with non-standard chromosomes.
Transformed 312095 junction records.


  7%|▋         | 129/1876 [04:44<1:05:25,  2.25s/it]

Found 14620 junctions contained within CDS regions.
Parsing new file...
Removed 1001 rows with non-standard chromosomes.
Transformed 281954 junction records.


  7%|▋         | 130/1876 [04:46<1:05:43,  2.26s/it]

Found 9177 junctions contained within CDS regions.
Parsing new file...
Removed 508 rows with non-standard chromosomes.
Transformed 293219 junction records.


  7%|▋         | 131/1876 [04:48<1:05:34,  2.25s/it]

Found 6619 junctions contained within CDS regions.
Parsing new file...
Removed 296 rows with non-standard chromosomes.
Transformed 203694 junction records.


  7%|▋         | 132/1876 [04:50<1:01:28,  2.11s/it]

Found 3375 junctions contained within CDS regions.
Parsing new file...
Removed 1452 rows with non-standard chromosomes.
Transformed 270648 junction records.


  7%|▋         | 133/1876 [04:53<1:02:33,  2.15s/it]

Found 10229 junctions contained within CDS regions.
Parsing new file...
Removed 805 rows with non-standard chromosomes.
Transformed 259705 junction records.


  7%|▋         | 134/1876 [04:55<1:03:06,  2.17s/it]

Found 8020 junctions contained within CDS regions.
Parsing new file...
Removed 1006 rows with non-standard chromosomes.
Transformed 275323 junction records.


  7%|▋         | 135/1876 [04:57<1:03:58,  2.20s/it]

Found 11130 junctions contained within CDS regions.
Parsing new file...
Removed 973 rows with non-standard chromosomes.
Transformed 267948 junction records.


  7%|▋         | 136/1876 [04:59<1:04:17,  2.22s/it]

Found 10110 junctions contained within CDS regions.
Parsing new file...
Removed 1236 rows with non-standard chromosomes.
Transformed 299056 junction records.


  7%|▋         | 137/1876 [05:02<1:05:49,  2.27s/it]

Found 12662 junctions contained within CDS regions.
Parsing new file...
Removed 972 rows with non-standard chromosomes.
Transformed 307421 junction records.


  7%|▋         | 138/1876 [05:04<1:06:39,  2.30s/it]

Found 11131 junctions contained within CDS regions.
Parsing new file...
Removed 734 rows with non-standard chromosomes.
Transformed 300785 junction records.


  7%|▋         | 139/1876 [05:06<1:06:46,  2.31s/it]

Found 8975 junctions contained within CDS regions.
Parsing new file...
Removed 728 rows with non-standard chromosomes.
Transformed 275334 junction records.


  7%|▋         | 140/1876 [05:08<1:05:09,  2.25s/it]

Found 10091 junctions contained within CDS regions.
Parsing new file...
Removed 752 rows with non-standard chromosomes.
Transformed 273258 junction records.


  8%|▊         | 141/1876 [05:11<1:04:04,  2.22s/it]

Found 10006 junctions contained within CDS regions.
Parsing new file...
Removed 1181 rows with non-standard chromosomes.
Transformed 280581 junction records.


  8%|▊         | 142/1876 [05:13<1:03:46,  2.21s/it]

Found 11897 junctions contained within CDS regions.
Parsing new file...
Removed 1309 rows with non-standard chromosomes.
Transformed 293707 junction records.


  8%|▊         | 143/1876 [05:16<1:10:28,  2.44s/it]

Found 8393 junctions contained within CDS regions.
Parsing new file...
Removed 1054 rows with non-standard chromosomes.
Transformed 278280 junction records.


  8%|▊         | 144/1876 [05:19<1:13:20,  2.54s/it]

Found 11408 junctions contained within CDS regions.
Parsing new file...
Removed 1439 rows with non-standard chromosomes.
Transformed 290026 junction records.


  8%|▊         | 145/1876 [05:21<1:14:25,  2.58s/it]

Found 12733 junctions contained within CDS regions.
Parsing new file...
Removed 1120 rows with non-standard chromosomes.
Transformed 331005 junction records.


  8%|▊         | 146/1876 [05:24<1:16:03,  2.64s/it]

Found 13079 junctions contained within CDS regions.
Parsing new file...
Removed 738 rows with non-standard chromosomes.
Transformed 258917 junction records.


  8%|▊         | 147/1876 [05:26<1:12:43,  2.52s/it]

Found 6463 junctions contained within CDS regions.
Parsing new file...
Removed 930 rows with non-standard chromosomes.
Transformed 255914 junction records.


  8%|▊         | 148/1876 [05:28<1:09:01,  2.40s/it]

Found 10562 junctions contained within CDS regions.
Parsing new file...
Removed 1000 rows with non-standard chromosomes.
Transformed 292248 junction records.


  8%|▊         | 149/1876 [05:31<1:08:28,  2.38s/it]

Found 8248 junctions contained within CDS regions.
Parsing new file...
Removed 1110 rows with non-standard chromosomes.
Transformed 278285 junction records.


  8%|▊         | 150/1876 [05:33<1:08:08,  2.37s/it]

Found 11307 junctions contained within CDS regions.
Parsing new file...
Removed 1033 rows with non-standard chromosomes.
Transformed 271107 junction records.


  8%|▊         | 151/1876 [05:35<1:06:24,  2.31s/it]

Found 9264 junctions contained within CDS regions.
Parsing new file...
Removed 1581 rows with non-standard chromosomes.
Transformed 225523 junction records.


  8%|▊         | 152/1876 [05:37<1:03:02,  2.19s/it]

Found 6718 junctions contained within CDS regions.
Parsing new file...
Removed 1379 rows with non-standard chromosomes.
Transformed 272868 junction records.


  8%|▊         | 153/1876 [05:39<1:02:39,  2.18s/it]

Found 8730 junctions contained within CDS regions.
Parsing new file...
Removed 1125 rows with non-standard chromosomes.
Transformed 255666 junction records.


  8%|▊         | 154/1876 [05:41<1:01:59,  2.16s/it]

Found 10172 junctions contained within CDS regions.
Parsing new file...
Removed 572 rows with non-standard chromosomes.
Transformed 250319 junction records.


  8%|▊         | 155/1876 [05:43<1:00:29,  2.11s/it]

Found 12918 junctions contained within CDS regions.
Parsing new file...
Removed 1226 rows with non-standard chromosomes.
Transformed 247209 junction records.


  8%|▊         | 156/1876 [05:45<59:36,  2.08s/it]  

Found 6487 junctions contained within CDS regions.
Parsing new file...
Removed 853 rows with non-standard chromosomes.
Transformed 258856 junction records.


  8%|▊         | 157/1876 [05:47<59:36,  2.08s/it]

Found 6723 junctions contained within CDS regions.
Parsing new file...
Removed 1336 rows with non-standard chromosomes.
Transformed 298713 junction records.


  8%|▊         | 158/1876 [05:50<1:02:24,  2.18s/it]

Found 9556 junctions contained within CDS regions.
Parsing new file...
Removed 548 rows with non-standard chromosomes.
Transformed 249011 junction records.


  8%|▊         | 159/1876 [05:52<1:01:19,  2.14s/it]

Found 5589 junctions contained within CDS regions.
Parsing new file...
Removed 937 rows with non-standard chromosomes.
Transformed 260086 junction records.


  9%|▊         | 160/1876 [05:54<1:00:43,  2.12s/it]

Found 7954 junctions contained within CDS regions.
Parsing new file...
Removed 1069 rows with non-standard chromosomes.
Transformed 295546 junction records.


  9%|▊         | 161/1876 [05:56<1:02:16,  2.18s/it]

Found 9576 junctions contained within CDS regions.
Parsing new file...
Removed 810 rows with non-standard chromosomes.
Transformed 283068 junction records.


  9%|▊         | 162/1876 [05:59<1:02:31,  2.19s/it]

Found 7205 junctions contained within CDS regions.
Parsing new file...
Removed 2361 rows with non-standard chromosomes.
Transformed 251334 junction records.


  9%|▊         | 163/1876 [06:01<1:01:24,  2.15s/it]

Found 6131 junctions contained within CDS regions.
Parsing new file...
Removed 1022 rows with non-standard chromosomes.
Transformed 298223 junction records.


  9%|▊         | 164/1876 [06:03<1:02:46,  2.20s/it]

Found 11616 junctions contained within CDS regions.
Parsing new file...
Removed 671 rows with non-standard chromosomes.
Transformed 242799 junction records.


  9%|▉         | 165/1876 [06:05<1:01:03,  2.14s/it]

Found 7482 junctions contained within CDS regions.
Parsing new file...
Removed 691 rows with non-standard chromosomes.
Transformed 246440 junction records.


  9%|▉         | 166/1876 [06:07<1:00:14,  2.11s/it]

Found 7762 junctions contained within CDS regions.
Parsing new file...
Removed 939 rows with non-standard chromosomes.
Transformed 246897 junction records.


  9%|▉         | 167/1876 [06:09<59:16,  2.08s/it]  

Found 9215 junctions contained within CDS regions.
Parsing new file...
Removed 1658 rows with non-standard chromosomes.
Transformed 305817 junction records.


  9%|▉         | 168/1876 [06:11<1:01:41,  2.17s/it]

Found 14182 junctions contained within CDS regions.
Parsing new file...
Removed 1058 rows with non-standard chromosomes.
Transformed 276644 junction records.


  9%|▉         | 169/1876 [06:14<1:02:08,  2.18s/it]

Found 14170 junctions contained within CDS regions.
Parsing new file...
Removed 1138 rows with non-standard chromosomes.
Transformed 286287 junction records.


  9%|▉         | 170/1876 [06:16<1:02:28,  2.20s/it]

Found 11672 junctions contained within CDS regions.
Parsing new file...
Removed 421 rows with non-standard chromosomes.
Transformed 226835 junction records.


  9%|▉         | 171/1876 [06:18<1:00:00,  2.11s/it]

Found 5144 junctions contained within CDS regions.
Parsing new file...
Removed 1009 rows with non-standard chromosomes.
Transformed 281456 junction records.


  9%|▉         | 172/1876 [06:20<1:01:11,  2.15s/it]

Found 11169 junctions contained within CDS regions.
Parsing new file...
Removed 3285 rows with non-standard chromosomes.
Transformed 435112 junction records.


  9%|▉         | 173/1876 [06:23<1:08:56,  2.43s/it]

Found 23353 junctions contained within CDS regions.
Parsing new file...
Removed 1398 rows with non-standard chromosomes.
Transformed 334129 junction records.


  9%|▉         | 174/1876 [06:26<1:10:25,  2.48s/it]

Found 13777 junctions contained within CDS regions.
Parsing new file...
Removed 968 rows with non-standard chromosomes.
Transformed 289600 junction records.


  9%|▉         | 175/1876 [06:28<1:08:10,  2.40s/it]

Found 9851 junctions contained within CDS regions.
Parsing new file...
Removed 1192 rows with non-standard chromosomes.
Transformed 287939 junction records.


  9%|▉         | 176/1876 [06:30<1:08:49,  2.43s/it]

Found 10887 junctions contained within CDS regions.
Parsing new file...
Removed 607 rows with non-standard chromosomes.
Transformed 246391 junction records.


  9%|▉         | 177/1876 [06:32<1:05:20,  2.31s/it]

Found 4512 junctions contained within CDS regions.
Parsing new file...
Removed 1310 rows with non-standard chromosomes.
Transformed 244138 junction records.


  9%|▉         | 178/1876 [06:34<1:02:33,  2.21s/it]

Found 6342 junctions contained within CDS regions.
Parsing new file...
Removed 1971 rows with non-standard chromosomes.
Transformed 291345 junction records.


 10%|▉         | 179/1876 [06:37<1:02:23,  2.21s/it]

Found 13758 junctions contained within CDS regions.
Parsing new file...
Removed 938 rows with non-standard chromosomes.
Transformed 301610 junction records.


 10%|▉         | 180/1876 [06:39<1:03:28,  2.25s/it]

Found 10008 junctions contained within CDS regions.
Parsing new file...
Removed 1202 rows with non-standard chromosomes.
Transformed 272446 junction records.


 10%|▉         | 181/1876 [06:41<1:02:46,  2.22s/it]

Found 9531 junctions contained within CDS regions.
Parsing new file...
Removed 830 rows with non-standard chromosomes.
Transformed 272064 junction records.


 10%|▉         | 182/1876 [06:43<1:02:31,  2.21s/it]

Found 9357 junctions contained within CDS regions.
Parsing new file...
Removed 1114 rows with non-standard chromosomes.
Transformed 305897 junction records.


 10%|▉         | 183/1876 [06:46<1:03:14,  2.24s/it]

Found 14165 junctions contained within CDS regions.
Parsing new file...
Removed 4333 rows with non-standard chromosomes.
Transformed 405456 junction records.


 10%|▉         | 184/1876 [06:49<1:12:15,  2.56s/it]

Found 15718 junctions contained within CDS regions.
Parsing new file...
Removed 1140 rows with non-standard chromosomes.
Transformed 284994 junction records.


 10%|▉         | 185/1876 [06:51<1:12:40,  2.58s/it]

Found 13116 junctions contained within CDS regions.
Parsing new file...
Removed 1101 rows with non-standard chromosomes.
Transformed 281541 junction records.


 10%|▉         | 186/1876 [06:54<1:10:45,  2.51s/it]

Found 9592 junctions contained within CDS regions.
Parsing new file...
Removed 1650 rows with non-standard chromosomes.
Transformed 347087 junction records.


 10%|▉         | 187/1876 [06:57<1:13:47,  2.62s/it]

Found 23012 junctions contained within CDS regions.
Parsing new file...
Removed 896 rows with non-standard chromosomes.
Transformed 282553 junction records.


 10%|█         | 188/1876 [06:59<1:10:05,  2.49s/it]

Found 10976 junctions contained within CDS regions.
Parsing new file...
Removed 1455 rows with non-standard chromosomes.
Transformed 288530 junction records.


 10%|█         | 189/1876 [07:01<1:08:50,  2.45s/it]

Found 12155 junctions contained within CDS regions.
Parsing new file...
Removed 963 rows with non-standard chromosomes.
Transformed 268920 junction records.


 10%|█         | 190/1876 [07:04<1:07:49,  2.41s/it]

Found 7343 junctions contained within CDS regions.
Parsing new file...
Removed 927 rows with non-standard chromosomes.
Transformed 287792 junction records.


 10%|█         | 191/1876 [07:06<1:07:53,  2.42s/it]

Found 12969 junctions contained within CDS regions.
Parsing new file...
Removed 1007 rows with non-standard chromosomes.
Transformed 260081 junction records.


 10%|█         | 192/1876 [07:08<1:04:38,  2.30s/it]

Found 9057 junctions contained within CDS regions.
Parsing new file...
Removed 631 rows with non-standard chromosomes.
Transformed 291570 junction records.


 10%|█         | 193/1876 [07:10<1:03:53,  2.28s/it]

Found 8565 junctions contained within CDS regions.
Parsing new file...
Removed 687 rows with non-standard chromosomes.
Transformed 251977 junction records.


 10%|█         | 194/1876 [07:12<1:02:01,  2.21s/it]

Found 7634 junctions contained within CDS regions.
Parsing new file...
Removed 1613 rows with non-standard chromosomes.
Transformed 255140 junction records.


 10%|█         | 195/1876 [07:14<59:57,  2.14s/it]  

Found 7530 junctions contained within CDS regions.
Parsing new file...
Removed 1305 rows with non-standard chromosomes.
Transformed 322539 junction records.


 10%|█         | 196/1876 [07:17<1:03:02,  2.25s/it]

Found 13798 junctions contained within CDS regions.
Parsing new file...
Removed 1376 rows with non-standard chromosomes.
Transformed 281937 junction records.


 11%|█         | 197/1876 [07:19<1:03:08,  2.26s/it]

Found 13235 junctions contained within CDS regions.
Parsing new file...
Removed 1333 rows with non-standard chromosomes.
Transformed 298123 junction records.


 11%|█         | 198/1876 [07:21<1:03:32,  2.27s/it]

Found 11728 junctions contained within CDS regions.
Parsing new file...
Removed 1502 rows with non-standard chromosomes.
Transformed 257846 junction records.


 11%|█         | 199/1876 [07:24<1:02:38,  2.24s/it]

Found 5411 junctions contained within CDS regions.
Parsing new file...
Removed 1114 rows with non-standard chromosomes.
Transformed 323185 junction records.


 11%|█         | 200/1876 [07:26<1:04:22,  2.30s/it]

Found 10295 junctions contained within CDS regions.
Parsing new file...
Removed 1190 rows with non-standard chromosomes.
Transformed 306664 junction records.


 11%|█         | 201/1876 [07:28<1:04:52,  2.32s/it]

Found 14402 junctions contained within CDS regions.
Parsing new file...
Removed 460 rows with non-standard chromosomes.
Transformed 228577 junction records.


 11%|█         | 202/1876 [07:30<1:01:00,  2.19s/it]

Found 4762 junctions contained within CDS regions.
Parsing new file...
Removed 517 rows with non-standard chromosomes.
Transformed 244721 junction records.


 11%|█         | 203/1876 [07:32<59:07,  2.12s/it]  

Found 6202 junctions contained within CDS regions.
Parsing new file...
Removed 533 rows with non-standard chromosomes.
Transformed 262973 junction records.


 11%|█         | 204/1876 [07:35<1:01:32,  2.21s/it]

Found 8981 junctions contained within CDS regions.
Parsing new file...
Removed 1572 rows with non-standard chromosomes.
Transformed 294732 junction records.


 11%|█         | 205/1876 [07:37<1:02:00,  2.23s/it]

Found 18764 junctions contained within CDS regions.
Parsing new file...
Removed 947 rows with non-standard chromosomes.
Transformed 262494 junction records.


 11%|█         | 206/1876 [07:39<1:00:48,  2.18s/it]

Found 9184 junctions contained within CDS regions.
Parsing new file...
Removed 947 rows with non-standard chromosomes.
Transformed 253166 junction records.


 11%|█         | 207/1876 [07:41<59:28,  2.14s/it]  

Found 7848 junctions contained within CDS regions.
Parsing new file...
Removed 1074 rows with non-standard chromosomes.
Transformed 266280 junction records.


 11%|█         | 208/1876 [07:43<59:46,  2.15s/it]

Found 9276 junctions contained within CDS regions.
Parsing new file...
Removed 1416 rows with non-standard chromosomes.
Transformed 281273 junction records.


 11%|█         | 209/1876 [07:46<1:01:10,  2.20s/it]

Found 9524 junctions contained within CDS regions.
Parsing new file...
Removed 1051 rows with non-standard chromosomes.
Transformed 319451 junction records.


 11%|█         | 210/1876 [07:48<1:04:38,  2.33s/it]

Found 9999 junctions contained within CDS regions.
Parsing new file...
Removed 1680 rows with non-standard chromosomes.
Transformed 305380 junction records.


 11%|█         | 211/1876 [07:50<1:04:31,  2.33s/it]

Found 15201 junctions contained within CDS regions.
Parsing new file...
Removed 1077 rows with non-standard chromosomes.
Transformed 266759 junction records.


 11%|█▏        | 212/1876 [07:53<1:02:22,  2.25s/it]

Found 10052 junctions contained within CDS regions.
Parsing new file...
Removed 511 rows with non-standard chromosomes.
Transformed 249516 junction records.


 11%|█▏        | 213/1876 [07:55<1:00:12,  2.17s/it]

Found 8371 junctions contained within CDS regions.
Parsing new file...
Removed 1774 rows with non-standard chromosomes.
Transformed 316507 junction records.


 11%|█▏        | 214/1876 [07:57<1:02:06,  2.24s/it]

Found 19814 junctions contained within CDS regions.
Parsing new file...
Removed 1220 rows with non-standard chromosomes.
Transformed 266127 junction records.


 11%|█▏        | 215/1876 [07:59<1:01:22,  2.22s/it]

Found 8634 junctions contained within CDS regions.
Parsing new file...
Removed 1462 rows with non-standard chromosomes.
Transformed 287750 junction records.


 12%|█▏        | 216/1876 [08:01<1:00:46,  2.20s/it]

Found 12793 junctions contained within CDS regions.
Parsing new file...
Removed 410 rows with non-standard chromosomes.
Transformed 233055 junction records.


 12%|█▏        | 217/1876 [08:03<58:41,  2.12s/it]  

Found 4380 junctions contained within CDS regions.
Parsing new file...
Removed 778 rows with non-standard chromosomes.
Transformed 288338 junction records.


 12%|█▏        | 218/1876 [08:05<59:29,  2.15s/it]

Found 10126 junctions contained within CDS regions.
Parsing new file...
Removed 1257 rows with non-standard chromosomes.
Transformed 282476 junction records.


 12%|█▏        | 219/1876 [08:08<59:47,  2.17s/it]

Found 12611 junctions contained within CDS regions.
Parsing new file...
Removed 1191 rows with non-standard chromosomes.
Transformed 280522 junction records.


 12%|█▏        | 220/1876 [08:10<59:37,  2.16s/it]

Found 12724 junctions contained within CDS regions.
Parsing new file...
Removed 770 rows with non-standard chromosomes.
Transformed 296467 junction records.


 12%|█▏        | 221/1876 [08:12<1:03:17,  2.29s/it]

Found 6307 junctions contained within CDS regions.
Parsing new file...
Removed 957 rows with non-standard chromosomes.
Transformed 252380 junction records.


 12%|█▏        | 222/1876 [08:15<1:02:19,  2.26s/it]

Found 10433 junctions contained within CDS regions.
Parsing new file...
Removed 1134 rows with non-standard chromosomes.
Transformed 280929 junction records.


 12%|█▏        | 223/1876 [08:17<1:06:17,  2.41s/it]

Found 10664 junctions contained within CDS regions.
Parsing new file...
Removed 1666 rows with non-standard chromosomes.
Transformed 277648 junction records.


 12%|█▏        | 224/1876 [08:20<1:05:35,  2.38s/it]

Found 13499 junctions contained within CDS regions.
Parsing new file...
Removed 740 rows with non-standard chromosomes.
Transformed 277211 junction records.


 12%|█▏        | 225/1876 [08:22<1:05:35,  2.38s/it]

Found 11340 junctions contained within CDS regions.
Parsing new file...
Removed 532 rows with non-standard chromosomes.
Transformed 234758 junction records.


 12%|█▏        | 226/1876 [08:24<1:01:19,  2.23s/it]

Found 9310 junctions contained within CDS regions.
Parsing new file...
Removed 3346 rows with non-standard chromosomes.
Transformed 404217 junction records.


 12%|█▏        | 227/1876 [08:27<1:08:34,  2.50s/it]

Found 23529 junctions contained within CDS regions.
Parsing new file...
Removed 1003 rows with non-standard chromosomes.
Transformed 274708 junction records.


 12%|█▏        | 228/1876 [08:29<1:06:22,  2.42s/it]

Found 14085 junctions contained within CDS regions.
Parsing new file...
Removed 785 rows with non-standard chromosomes.
Transformed 281048 junction records.


 12%|█▏        | 229/1876 [08:31<1:04:39,  2.36s/it]

Found 9000 junctions contained within CDS regions.
Parsing new file...
Removed 1257 rows with non-standard chromosomes.
Transformed 261014 junction records.


 12%|█▏        | 230/1876 [08:34<1:03:13,  2.30s/it]

Found 9454 junctions contained within CDS regions.
Parsing new file...
Removed 1899 rows with non-standard chromosomes.
Transformed 307999 junction records.


 12%|█▏        | 231/1876 [08:36<1:04:10,  2.34s/it]

Found 17799 junctions contained within CDS regions.
Parsing new file...
Removed 812 rows with non-standard chromosomes.
Transformed 231909 junction records.


 12%|█▏        | 232/1876 [08:38<1:01:23,  2.24s/it]

Found 4710 junctions contained within CDS regions.
Parsing new file...
Removed 421 rows with non-standard chromosomes.
Transformed 264961 junction records.


 12%|█▏        | 233/1876 [08:40<1:00:10,  2.20s/it]

Found 5863 junctions contained within CDS regions.
Parsing new file...
Removed 1092 rows with non-standard chromosomes.
Transformed 259064 junction records.


 12%|█▏        | 234/1876 [08:42<59:24,  2.17s/it]  

Found 10932 junctions contained within CDS regions.
Parsing new file...
Removed 653 rows with non-standard chromosomes.
Transformed 254214 junction records.


 13%|█▎        | 235/1876 [08:44<58:22,  2.13s/it]

Found 10257 junctions contained within CDS regions.
Parsing new file...
Removed 1380 rows with non-standard chromosomes.
Transformed 319404 junction records.


 13%|█▎        | 236/1876 [08:47<1:01:09,  2.24s/it]

### 5. Summarize Exitron Info 
- Lists all unique exitrons and their counts

- Identifies which exitrons are already annotated

### 6. Exitron Normalization
- Divides exitron score by the reads of surrounding exons to find proportion of time that the exitron gets expressed