## PEGG 2.0

- Updates to PEGG:
    - more flexible data input (following PRIDICT/PrimeDesign format)
    - more flexible PAM sequence searching
    - inclusion of base editing sensor module
    - fixing INS/DEL error
    - G+19 instead of G+20
    - Better fetching of MIT specificity and Rule Set 2/3 information...
    - Automated generation of WT/edited sequence generation
        - Use this to determine errors in pegRNAs...
    - Improved distance to nick and homology overhang sizing
    - Also automate finding of silent variants for MMR-evasion...



In [48]:
import numpy as np
import regex as re
import pandas as pd
import matplotlib.pyplot as plt

## improved PAM finder for various PAM sequences

In [64]:
substitution_example = 'CACACCTACACTGCTCGAAGTAAATATGCGAAGCGCGCGGCCTGGCCGGAGGCGTTCCGCGCCGCCACGTGTTCGTTAACTGTTGATTGGTGGCACATAAGCAATCGTAGTCCGTCAAATTCAGCTCTGTTATCCCGGGCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG(GGC/TTG)AGAGACCCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAACAAGTCGATGCAGGCTCCCGTCTTTGAAAAGGGGTAAACATACAAGTGGATAGATGATGGGTAGGGGCCTCCAATACATCCAACACTCTACGCCCTCTCCAAGAGCTAGAAGGGCACCCTGCAGTTGGAAAGGG'
ins_example = 'CACACCTACACTGCTCGAAGTAAATATGCGAAGCGCGCGGCCTGGCCGGAGGCGTTCCGCGCCGCCACGTGTTCGTTAACTGTTGATTGGTGGCACATAAGCAATCGTAGTCCGTCAAATTCAGCTCTGTTATCCCGGGCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGGGA(/GTAA)GAGACCCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAACAAGTCGATGCAGGCTCCCGTCTTTGAAAAGGGGTAAACATACAAGTGGATAGATGATGGGTAGGGGCCTCCAATACATCCAACACTCTACGCCCTCTCCAAGAGCTAGAAGGGCACCCTGCAGTTGGAAAGGG'
del_eaxmple = 'CACACCTACACTGCTCGAAGTAAATATGCGAAGCGCGCGGCCTGGCCGGAGGCGTTCCGCGCCGCCACGTGTTCGTTAACTGTTGATTGGTGGCACATAAGCAATCGTAGTCCGTCAAATTCAGCTCTGTTATCCCGGGCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGGGAG(AGAC/)CCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAACAAGTCGATGCAGGCTCCCGTCTTTGAAAAGGGGTAAACATACAAGTGGATAGATGATGGGTAGGGGCCTCCAATACATCCAACACTCTACGCCCTCTCCAAGAGCTAGAAGGGCACCCTGCAGTTGGAAAGGG'

In [76]:

#introduce error messages for each of these

start = substitution_example.find("(")

end = substitution_example.find(")")

replace_seq = substitution_example[start:end+1]
#if '/' in replace_seq:
    #if '+' in replace_seq:
        #throw an error

loc_replace = replace_seq.find('/')
wt_replace = replace_seq[1:loc_replace]
mut_replace = replace_seq[loc_replace+1:-1]

wt_seq = substitution_example[:start] + wt_replace + substitution_example[end+1:]

wt_start = start
wt_end = start+len(wt_replace)


In [109]:
replace_seq = '(/AGGG)'
loc_replace = replace_seq.find('/')
wt_replace = replace_seq[1:loc_replace]
mut_replace = replace_seq[loc_replace+1:-1]
wt_replace

''

In [85]:

context_size = 50

wt_w_context = wt_seq[wt_start-context_size:wt_start] + wt_seq[wt_start:wt_end] + wt_seq[wt_end:wt_end+context_size]



103

In [113]:
def mut_formatter(pridict_format, context_size = 60):
    """ 
    Takes in mutations in format of AAA(AA/GC)ATAGC
    and converts it into a format that allows pegRNAs to be generated

    Parameters
    _____
    pridict_format = list of sequences in e.g. AAA(AA/GC)ATAGC format
    context_size = amount of nt on either side of mutation to select
    """

    original_start = []
    original_end = []

    wt = []
    mut = []
    
    wt_context = []
    mut_context = []

    left_context_length = []
    right_context_length = []

    replace_start = []
    replace_end = []

    for k in pridict_format:
        #find mutation
        start = k.find("(")
        end = k.find(")")

        #get out the mutant and WT allele
        replace_seq = k[start:end+1]
        loc_replace = replace_seq.find('/')
        wt_replace = replace_seq[1:loc_replace]
        mut_replace = replace_seq[loc_replace+1:-1]

        wt.append(wt_replace)
        mut.append(mut_replace)

        #generate full WT sequence
        wt_seq = substitution_example[:start] + wt_replace + substitution_example[end+1:]
        wt_start = start
        wt_end = start+len(wt_replace)
        original_start.append(wt_start)
        original_end.append(wt_end)

        #and just the subset
        left_context = wt_seq[wt_start-context_size:wt_start]
        print(left_context)
        right_context = wt_seq[wt_end:wt_end+context_size]

        wt_w_context = left_context + wt_replace + right_context
        mut_w_context = left_context + mut_replace + right_context

        wt_context.append(wt_w_context)
        mut_context.append(mut_w_context)

        left_context_length.append(len(left_context))
        right_context_length.append(len(right_context))

        replace_start.append(len(left_context))
        replace_end.append(len(left_context)+len(wt_replace))

    col_labels = ['Original_start', 'Original_end', 'WT', 'Mutant', 'WT_context', 'Mutant_context', 'Replace_start', 'Replace_end', 'Left_context_length', 'Right_context_length']
    cols = [original_start, original_end, wt, mut, wt_context, mut_context, replace_start, replace_end, left_context_length, right_context_length]

    df = pd.DataFrame(dict(zip(col_labels, cols)))

    return df

        





In [114]:
substitution_example = 'CACACCTACACTGCTCGAAGTAAATATGCGAAGCGCGCGGCCTGGCCGGAGGCGTTCCGCGCCGCCACGTGTTCGTTAACTGTTGATTGGTGGCACATAAGCAATCGTAGTCCGTCAAATTCAGCTCTGTTATCCCGGGCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG(GGC/TTG)AGAGACCCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAACAAGTCGATGCAGGCTCCCGTCTTTGAAAAGGGGTAAACATACAAGTGGATAGATGATGGGTAGGGGCCTCCAATACATCCAACACTCTACGCCCTCTCCAAGAGCTAGAAGGGCACCCTGCAGTTGGAAAGGG'
ins_example = 'CACACCTACACTGCTCGAAGTAAATATGCGAAGCGCGCGGCCTGGCCGGAGGCGTTCCGCGCCGCCACGTGTTCGTTAACTGTTGATTGGTGGCACATAAGCAATCGTAGTCCGTCAAATTCAGCTCTGTTATCCCGGGCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGGGA(/GTAA)GAGACCCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAACAAGTCGATGCAGGCTCCCGTCTTTGAAAAGGGGTAAACATACAAGTGGATAGATGATGGGTAGGGGCCTCCAATACATCCAACACTCTACGCCCTCTCCAAGAGCTAGAAGGGCACCCTGCAGTTGGAAAGGG'
del_example = 'CACACCTACACTGCTCGAAGTAAATATGCGAAGCGCGCGGCCTGGCCGGAGGCGTTCCGCGCCGCCACGTGTTCGTTAACTGTTGATTGGTGGCACATAAGCAATCGTAGTCCGTCAAATTCAGCTCTGTTATCCCGGGCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGGGAG(AGAC/)CCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAACAAGTCGATGCAGGCTCCCGTCTTTGAAAAGGGGTAAACATACAAGTGGATAGATGATGGGTAGGGGCCTCCAATACATCCAACACTCTACGCCCTCTCCAAGAGCTAGAAGGGCACCCTGCAGTTGGAAAGGG'

pridict_format = [substitution_example, ins_example, del_example]

df = mut_formatter(pridict_format)

GCGTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG
GTTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG(G
TTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG(GG


In [112]:
print(df.iloc[2]['WT_context'])
print(df.iloc[2]['Mutant_context'])

TTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG(GGAGACGAGACCCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAAC
TTATGTGTCAAATGGCGTAGAACGGGATTGACTGTTTGACGGTAGCTGCTGAGGCGG(GGGAGACCCTCCGTCGGGCTATGTCACTAATACTTTCCAAACGCCCCGTACCGATGCTGAAC


In [None]:
def PAM_finder(WT_seq, mut_start, mut_end,  PAM, RTT_length_max):
    """Identifies the location of PAM sequences on the + and - strand.
    Returns a 2-d array containing marked locations of PAM sequence start locations on + and - strand.
    
    Parameters
    ----------
    WT_seq: WT sequence...

    PAM_seq: PAM sequence to search for...

    RTT_length_max: maximum size of RTT length for searching...
    
    """

    distance_PAM_to_nick = 3
    PAM_size = len(PAM)
    mut_size = mut_start-mut_end

    search_size = RTT_length_max - distance_PAM_to_nick

    #deal with extra PAM sequences later on...
    #and other potential issues
    plus_search = WT_seq[mut_end-search_size-5 : mut_start+distance_PAM_to_nick + PAM_size+5]


    #---------------Loading in sequences for PAM Searching------------------#
    
    search_size = RTT_length - len(PAM)-1 #need to modify this for insertion/deletions...
    #size mut doesn't capture the size of insertions, only the size of deletions

    plus_search = seq1[seq_start-1-search_size : seq_end+search_size].upper()
    minus_search = plus_search.complement().upper()
    
    mut_start_idx = 1+search_size
    mut_end_idx = 1+search_size+size_mut #not accurate for insertions; does work for indexing though...

    plus_search1 = plus_search[:mut_start_idx+3+len(PAM)-1]
    minus_search1 = minus_search[mut_start_idx-3-len(PAM):]

    #---------------PAM Searching------------------#

    #replacing N with regex symbol
    PAM_regex = PAM.replace('N', '/*.')

    PAM_search_plus = re.compile('(?=(' + PAM_regex + '))', re.IGNORECASE)

    iterator_plus = PAM_search_plus.finditer(str(plus_search1))
    PAM_starts_plus = [match.start() for match in iterator_plus]


    PAM_minus = PAM[::-1]#reversing it
    PAM_regex_minus = PAM_minus.replace('N', '/*.')

    PAM_search_minus = re.compile('(?=(' + PAM_regex_minus + '))', re.IGNORECASE)

    iterator_minus = PAM_search_minus.finditer(str(minus_search1))
    PAM_starts_minus = [match.start() for match in iterator_minus]
    #since things are flipped on minus strand, adding len(PAM) to get the true "start" to the PAM

    PAM_starts_minus = np.asarray(PAM_starts_minus) + len(PAM) + (mut_start_idx-3-len(PAM))#and correct for indexing

    return np.asarray([np.asarray(PAM_starts_plus), PAM_starts_minus], dtype='object')-mut_start_idx

In [47]:
def PAM_finder(WT_seq, mut_start, mut_end, PAM, RTT_length_max):
    """Identifies the location of PAM sequences on the + and - strand.
    Returns a 2-d array containing marked locations of PAM sequence start locations on + and - strand.
    
    Parameters
    ----------
    WT_seq: WT sequence...

    PAM_seq: PAM sequence to search for...

    RTT_length_max: maximum size of RTT length for searching...
    
    """

    #---------------Loading in sequences for PAM Searching------------------#
    
    search_size = RTT_length - len(PAM)-1 #need to modify this for insertion/deletions...
    #size mut doesn't capture the size of insertions, only the size of deletions

    plus_search = seq1[seq_start-1-search_size : seq_end+search_size].upper()
    minus_search = plus_search.complement().upper()
    
    mut_start_idx = 1+search_size
    mut_end_idx = 1+search_size+size_mut #not accurate for insertions; does work for indexing though...

    plus_search1 = plus_search[:mut_start_idx+3+len(PAM)-1]
    minus_search1 = minus_search[mut_start_idx-3-len(PAM):]

    #---------------PAM Searching------------------#

    #replacing N with regex symbol
    PAM_regex = PAM.replace('N', '/*.')

    PAM_search_plus = re.compile('(?=(' + PAM_regex + '))', re.IGNORECASE)

    iterator_plus = PAM_search_plus.finditer(str(plus_search1))
    PAM_starts_plus = [match.start() for match in iterator_plus]


    PAM_minus = PAM[::-1]#reversing it
    PAM_regex_minus = PAM_minus.replace('N', '/*.')

    PAM_search_minus = re.compile('(?=(' + PAM_regex_minus + '))', re.IGNORECASE)

    iterator_minus = PAM_search_minus.finditer(str(minus_search1))
    PAM_starts_minus = [match.start() for match in iterator_minus]
    #since things are flipped on minus strand, adding len(PAM) to get the true "start" to the PAM

    PAM_starts_minus = np.asarray(PAM_starts_minus) + len(PAM) + (mut_start_idx-3-len(PAM))#and correct for indexing

    return np.asarray([np.asarray(PAM_starts_plus), PAM_starts_minus], dtype='object')-mut_start_idx

In [None]:
PAM = 'NGG'

PAM_finder(WT_seq, mut_start, mut_end, PAM, RTT_length_max)

# Generating list of variants for alvin

- Starting with diego library
    - includes SNPs in IDR regions
    - Also intronic SNPs between exons that are an IDR
        - These wont be amenable to MMR-evasive silent edits

- Need WT sequnece
- Mutant sequence
- Frame (i.e. codon frame = 0,1,2)

In [122]:
import pegg
import gffutils

In [120]:
filepath = '/Users/samgould/Desktop/FSR Lab/reference files/GRCh37/ncbi-genomes-2022-03-17/GCF_000001405.25_GRCh37.p13_genomic.fna.gz'

records, index_list = pegg.genome_loader(filepath)

In [123]:
file = '/Users/samgould/Desktop/FSR Lab/reference files/gencode_v19.db'
db = gffutils.FeatureDB(file)

In [116]:
#diego variants
idr = pd.read_csv('filtered_idr_mutations_5count_6nt_indels.csv')
idr.keys()

Index(['COUNT', 'Hugo_Symbol', 'Entrez_Gene_Id', 'Center', 'NCBI_Build',
       'Chromosome', 'Start_Position', 'End_Position', 'Strand', 'Consequence',
       'Variant_Classification', 'Variant_Type', 'Reference_Allele',
       'Tumor_Seq_Allele1', 'Tumor_Seq_Allele2', 'dbSNP_RS',
       'dbSNP_Val_Status', 'Tumor_Sample_Barcode',
       'Matched_Norm_Sample_Barcode', 'Match_Norm_Seq_Allele1',
       'Match_Norm_Seq_Allele2', 'Tumor_Validation_Allele1',
       'Tumor_Validation_Allele2', 'Match_Norm_Validation_Allele1',
       'Match_Norm_Validation_Allele2', 'Verification_Status',
       'Validation_Status', 'Mutation_Status', 'Sequencing_Phase',
       'Sequence_Source', 'Validation_Method', 'Score', 'BAM_File',
       'Sequencer', 't_ref_count', 't_alt_count', 'n_ref_count', 'n_alt_count',
       'HGVSc', 'HGVSp', 'HGVSp_Short', 'Transcript_ID', 'RefSeq',
       'Protein_position', 'Codons', 'Exon_Number', 'gnomAD_AF',
       'gnomAD_AFR_AF', 'gnomAD_AMR_AF', 'gnomAD_ASJ_AF', 'gnom

In [118]:
np.unique(idr['Variant_Type'], return_counts=True)

(array(['DEL', 'DNP', 'INS', 'ONP', 'SNP'], dtype=object),
 array([ 729,   42,  361,   13, 7955]))

In [137]:
tx_new = []
for i, val in idr.iterrows():
    h = val['HGVSc']
    tx_new.append(h.split(':')[0])

In [139]:
idr['transcript_id_TRUE'] = tx_new

In [141]:
#check all transcripts are annotated

t_ids = np.unique(idr['transcript_id_TRUE'])

not_found = []

for tx in t_ids:

    cds = list(db.children(tx, order_by='+end', featuretype=['CDS']))
    start_end_cds = [[i.start, i.end] for i in cds]
    if len(start_end_cds)==0:
        not_found.append(tx)

In [136]:
tx = 'ENST00000379607.5'

cds = list(db.children(tx, order_by='+end', featuretype=['CDS']))
start_end_cds = [[i.start, i.end] for i in cds]


[[20146427, 20146429],
 [20148634, 20148725],
 [20150300, 20150381],
 [20152075, 20152125],
 [20153856, 20153959],
 [20156657, 20156740],
 [20159743, 20159758]]

In [163]:
records[index_list[21]]

SeqRecord(seq=Seq('NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...NNN'), id='NC_000022.10', name='NC_000022.10', description='NC_000022.10 Homo sapiens chromosome 22, GRCh37.p13 Primary Assembly', dbxrefs=[])

In [173]:
context_size = 60


wt_w_context = []
alt_w_context = []

seq_start = []
seq_end = []

for i, val in idr.iterrows():
    vt = val['Variant_Type']
    s = val['Start_Position']
    e = val['End_Position']
    ref = val['Reference_Allele']
    alt = val['Tumor_Seq_Allele2']
    chrom = val['Chromosome']

    if chrom == 'X':
        chrom = 22
    elif chrom=='Y':
        chrom = 23
    else:
        chrom = int(chrom)-1

    chr_seq = records[index_list[chrom]].seq.upper()

    if vt in ['SNP', 'ONP', 'DNP']:
        ref = ref
        alt = alt
        #assert ref == chr_seq[s-1:e], print(ref, chr_seq[s-1:e])

    elif vt =='INS':
        ref = ''
        alt = alt

    elif vt=='DEL':
        ref = ref
        alt = ''

    left_context = chr_seq[s-1-context_size:s-1]
    right_context = chr_seq[e:e+context_size]

    wt_seq = left_context + ref + right_context
    alt_seq = left_context + alt + right_context

    wt_w_context.append(wt_seq)
    alt_w_context.append(alt_seq)
    seq_start.append(s-context_size)
    seq_end.append(e+context_size+1)

#based on these, get the frame, calculate mutation consequence, etc...

KeyboardInterrupt: 

In [174]:
#ba

992

In [149]:
ins = idr[idr['Variant_Type']=='INS']
dels = idr[idr['Variant_Type']=='DEL']

In [150]:
ins

Unnamed: 0,COUNT,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_Position,End_Position,Strand,Consequence,Variant_Classification,Variant_Type,Reference_Allele,Tumor_Seq_Allele1,Tumor_Seq_Allele2,dbSNP_RS,dbSNP_Val_Status,Tumor_Sample_Barcode,Matched_Norm_Sample_Barcode,Match_Norm_Seq_Allele1,Match_Norm_Seq_Allele2,Tumor_Validation_Allele1,Tumor_Validation_Allele2,Match_Norm_Validation_Allele1,Match_Norm_Validation_Allele2,...,Transcript_ID,RefSeq,Protein_position,Codons,Exon_Number,gnomAD_AF,gnomAD_AFR_AF,gnomAD_AMR_AF,gnomAD_ASJ_AF,gnomAD_EAS_AF,gnomAD_FIN_AF,gnomAD_NFE_AF,gnomAD_OTH_AF,gnomAD_SAS_AF,FILTER,Polyphen_Prediction,Polyphen_Score,SIFT_Prediction,SIFT_Score,SWISSPROT,n_depth,t_depth,Annotation_Status,mutationInCis_Flag,transcript_id_TRUE
4,762.0,ASXL1,171023.0,DFCI,GRCh37,20,31022441,31022442,+,frameshift_variant,Frame_Shift_Ins,INS,-,-,G,rs756958159,,GENIE-DFCI-003409-1958,,,,,,,,...,ENST00000375687,NM_015338.5,642.0,-/G,13/13,,,,,,,,,,PASS,,,,,,,83.0,SUCCESS,False,ENST00000375687.4
5,645.0,APC,324.0,JHU,GRCh37,5,112175952,112175953,+,frameshift_variant,Frame_Shift_Ins,INS,-,,A,,,GENIE-JHU-00198-00378,,,,,,,,...,ENST00000257430,NM_000038.5,1554.0,gaa/gaAa,16/16,,,,,,,,,,PASS,,,,,,,1268.0,SUCCESS,False,ENST00000257430.4
21,280.0,ARID1A,8289.0,DFCI,GRCh37,1,27105930,27105931,+,frameshift_variant,Frame_Shift_Ins,INS,-,-,G,rs758608743,,GENIE-DFCI-001750-9089,,,,,,,,...,ENST00000324856,NM_006015.4,1847.0,-/G,20/20,,,,,,,,,,PASS,,,,,,,582.0,SUCCESS,False,ENST00000324856.7
23,234.0,SETBP1,26040.0,VICC,GRCh37,18,42456671,42456672,+,intron_variant,Intron,INS,-,-,TCTT,rs33928380,,GENIE-VICC-330134-unk-4,GENIE-VICC-330134-unk-4-Normal,C,C,,,,,...,ENST00000282030,NM_015559.2,,,,,,,,,,,,,PASS,,,,,,294.0,534.0,SUCCESS,False,ENST00000282030.5
55,108.0,TCF7L2,6934.0,DFCI,GRCh37,10,114925316,114925317,+,frameshift_variant,Frame_Shift_Ins,INS,-,-,A,rs769592153,,GENIE-DFCI-151674-1525793,,,,,,,,...,ENST00000355995,,482.0,aga/agAa,15/15,,,,,,,,,,PASS,,,,,,,170.0,SUCCESS,False,ENST00000355995.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9011,5.0,ZRSR2,8233.0,UHN,GRCh37,X,15841255,15841256,+,protein_altering_variant,In_Frame_Ins,INS,-,-,AGCCGC,,,GENIE-UHN-AGI974552-BM1,NORMAL,,,,,,,...,ENST00000307771,NM_005089.3,447.0,agc/aAGCCGCgc,11/11,,,,,,,,,,PASS,,,,,,,819.0,SUCCESS,False,ENST00000307771.7
9013,5.0,TSC1,7248.0,UHN,GRCh37,9,135771988,135771989,+,protein_altering_variant,In_Frame_Ins,INS,-,-,GCT,,,GENIE-UHN-DIVA273992-ARC1,NORMAL,,,,,,,...,ENST00000298552,NM_001162426.1,1043.0,agc/agAGCc,23/23,,,,,,,,,,PASS,,,,,,,326.0,SUCCESS,False,ENST00000298552.3
9033,5.0,SMARCA4,6597.0,VICC,GRCh37,19,11170571,11170572,+,intron_variant,Intron,INS,-,-,G,,,GENIE-VICC-369751-unk-2,GENIE-VICC-369751-unk-2-Normal,C,C,,,,,...,ENST00000344626,NM_003072.3,,,,,,,,,,,,,PASS,,,,,,598.0,70.0,SUCCESS,False,ENST00000344626.4
9092,5.0,PDGFRA,5156.0,UCHI,GRCh37,4,55161224,55161225,+,intron_variant,Intron,INS,-,,G,,,GENIE-UCHI-Patient67-T1,,,,,,,,...,ENST00000257290,NM_006206.4,,,,,,,,,,,,,PASS,,,,,,,3315.0,SUCCESS,False,ENST00000257290.5
