In [1]:
import sys
import argparse
import os
import pandas as pd
import numpy as np
import re

from collections import Counter

pathPrefix = '/Users/friedman/Desktop/mnt'
sys.path.append(pathPrefix + '/ifs/work/taylorlab/friedman/myUtils')
import analysis_utils 
import mutationSigUtils 
import maf_analysis_utils
import mutation_modeling_util  
import signature_attribution_util
import clonality_analysis_util

In [None]:
allImpactMuts = analysis_utils.load_in_df_with_progress(filePath = pathPrefix + '/ifs/work/taylorlab/friedman/hypermutationAnalysisProj/projectDataAndConfigFiles/all_impact_mutations_annotated_cohort.maf', nLinesFile = 275000)


In [5]:
allImpactMuts['allele'] = allImpactMuts.apply(lambda row: str(row['Hugo_Symbol']) + '_' + str(row['HGVSp_Short']), axis=1)


In [4]:
allImpactMuts['quadNuc'] = allImpactMuts.apply(lambda row: mutationSigUtils.create_reference_four_nuc(row['Ref_Tri'], row['Reference_Allele'], row['Tumor_Seq_Allele2'], row['Variant_Type']), axis=1)


In [6]:
impactSigs = pd.read_table(pathPrefix + '/ifs/work/taylorlab/friedman/hypermutationAnalysisProj/projectDataAndConfigFiles/signatures_from_unfiltered_maf.txt')
impactSigs = mutationSigUtils.merge_signature_columns(impactSigs)
impactSigs['dominantSignature'] = impactSigs.apply(lambda row: 
        mutationSigUtils.get_dominant_signature(row.to_dict(), cols=None, prefix='mean', notEnoughMuts= True), axis=1)

casesWithMSISignature = set(impactSigs[(impactSigs['dominantSignature'] == 'mean_MMR')
                                      |((impactSigs['Nmut_Mb'] >= 30) & (impactSigs['dominantSignature'].isin(set(['mean_1']))))
                                        ]['Tumor_Sample_Barcode'])

casesWithPOLESignature = set(impactSigs[(impactSigs['dominantSignature'] == 'mean_10') & (impactSigs['Nmut_Mb'] > 30)]['Tumor_Sample_Barcode'])
casesWithTMZSignature = set(impactSigs[impactSigs['dominantSignature'] == 'mean_11']['Tumor_Sample_Barcode'])


In [6]:
def asses_observed_fraction_of_motif_favored_mutations_in_gene(allImpactMuts, gene, motifs, mode='oncogenic'):
    filePath = '/ifs/work/taylorlab/friedman/myAdjustedDataFiles/simulatedMafs/geneMutMafs/' + gene + '_all_possible_snps_v2.maf'
    geneMafQuadNuc = pd.read_table(pathPrefix + filePath)
    geneMafQuadNuc = geneMafQuadNuc[geneMafQuadNuc['quadNuc'].isin(motifs)]
    geneMafQuadNuc['allele'] = geneMafQuadNuc .apply(lambda row: str(row['Hugo_Symbol']) + '_' + str(row['HGVSp_Short']), axis=1)

    impactGeneQuadNucMaf = allImpactMuts[(allImpactMuts['Hugo_Symbol'] == gene) & (allImpactMuts['quadNuc'].isin(motifs))]
    
    allPossibleMutAlleles = None
    oncogenicAlleles = set(geneMafQuadNuc[(geneMafQuadNuc['oncogenic'].notnull()) & (geneMafQuadNuc['quadNuc'].isin(motifs))]['allele'])
    hotspotAlleles = set(geneMafQuadNuc[(geneMafQuadNuc['is-a-hotspot'] == 'Y') & (geneMafQuadNuc['quadNuc'].isin(motifs))]['allele'])
    impactMuts = None
    
    if mode == 'oncogenic':
        allPossibleMutAlleles = oncogenicAlleles
        allObservedImpactAlleles = set(impactGeneQuadNucMaf[impactGeneQuadNucMaf['oncogenic'].notnull()]['allele'])
    elif mode == 'hotspot':
        allPossibleMutAlleles = hotspotAlleles
        allObservedImpactAlleles = set(impactGeneQuadNucMaf[impactGeneQuadNucMaf['is-a-hotspot'] == 'Y']['allele'])

    print gene
    if len(allPossibleMutAlleles) != 0:
        print len(allPossibleMutAlleles)
        print (1.0*len(allObservedImpactAlleles))/len(allPossibleMutAlleles)
    

In [7]:
allImpactMuts = maf_analysis_utils.fix_mll_genes(allImpactMuts)

In [8]:
poleMotifs = set(['TCAT', 'TCTG'])
endometrialHyperIds = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=pathPrefix +'/ifs/work/taylorlab/friedman/hypermutationAnalysisProj/projectDataAndConfigFiles/hypermutationStatusIds', cancerType='Endometrial Cancer', hypermutantStatus = 'Hypermutated')
endometrialMuts = allImpactMuts[allImpactMuts['Tumor_Sample_Barcode'].isin(endometrialHyperIds)]

In [9]:
endometrialPole = endometrialMuts[endometrialMuts['Tumor_Sample_Barcode'].isin(casesWithPOLESignature)]

**Stuff for analysis of mutations at the POLE penta-nucleotide favored site** <br/><br/><br/><br/>

In [108]:
def normalize_dict_to_sum_to_one(d):
        factor=1.0/sum(d.values())
        for k in d:
          d[k] = d[k]*factor
        return d

In [7]:
poleMaf = allImpactMuts[allImpactMuts['Tumor_Sample_Barcode'].isin(casesWithPOLESignature)]

In [14]:
poleMaf.to_csv('/Users/friedman/Desktop/mnt/ifs/work/taylorlab/friedman/myAdjustedDataFiles/poleCaseMaf.tsv', index=False, sep='\t')

In [8]:
poleMafWithPentaContext = pd.read_table(pathPrefix + '/ifs/work/taylorlab/friedman/myAdjustedDataFiles/poleCaseMafWithPentanucleotideContext.maf')

  interactivity=interactivity, compiler=compiler, result=result)


In [9]:
reload(mutationSigUtils)
poleMafWithPentaContext['pentaChange'] = poleMafWithPentaContext.apply(lambda row: 
                                    mutationSigUtils.create_strand_specific_pentanucleotide_change(row['Ref_Tri.1'], row['Reference_Allele'], row['Tumor_Seq_Allele2'], row['Variant_Type']), axis=1)


In [10]:
poleMafWithPentaContext['alleleAndChange'] = poleMafWithPentaContext['HGVSp_Short'] + '_' + poleMafWithPentaContext['pentaChange']

In [11]:
tumorSuppresors = set(['ERRFI1', 'ASXL2', 'PMAIP1', 'ACTG1', 'SUFU', 'FBXO11', 'MEN1', 'FAM58A', 'B2M', 'RB1', 'DUSP22', 'SESN1', 'GPS2', 'RAD51D', 'SMG1', 'CDC73', 'MAP3K1', 'SMARCB1', 'INPP4B', 'PARK2', 'SMAD4', 'CBFB', 'CDH1', 'PPP6C', 'SETDB1', 'SETDB2', 'NF2', 'CDKN2B', 'CDKN2C', 'CDKN2A', 'DDX3X', 'PIK3R1', 'BARD1', 'PDS5B', 'KLF4', 'SPRED1', 'VHL', 'SMAD2', 'PMS1', 'PMS2', 'SETD2', 'GATA3', 'TBL1XR1', 'MUTYH', 'SOCS1', 'FAM175A', 'ROBO1', 'ARID1B', 'ARID1A', 'TCF7L2', 'STK11', 'FOXA1', 'PTEN', 'FAT1', 'FAS', 'CYLD', 'MAX', 'SH2D1A', 'APC', 'NTHL1', 'CTCF', 'KDM5C', 'KMT2C', 'ZFHX3', 'FOXP1', 'PIGA', 'CDKN1B', 'CDKN1A', 'FUBP1', 'MSH2', 'ID3', 'TNFRSF14', 'TRAF3', 'EP400', 'BRIP1', 'ARID4A', 'ARID4B', 'XRCC2', 'DAXX', 'SDHAF2', 'ASXL1', 'AMER1', 'RASA1', 'EGR1', 'MST1', 'SOX17', 'RUNX1', 'PIK3R3', 'NCOR1', 'NF1', 'JAK1', 'PTPRD', 'CHEK2', 'CHEK1', 'SMC1A', 'TMEM127', 'STAG1', 'RAD51', 'TCF3', 'STAG2', 'ARID2', 'RAD50', 'RNF43', 'PARP1', 'BLM', 'CUX1', 'RECQL', 'RAD21', 'PTPN2', 'PTPN1', 'SLX4', 'INHA', 'PAX5', 'IRF1', 'TP53', 'HLA-A', 'IRF8', 'CBL', 'TOP1', 'SHQ1', 'PRDM1', 'NSD1', 'ATXN2', 'CREBBP', 'HDAC4', 'SESN2', 'PPP2R1A', 'EPHA7', 'ATM', 'EPHA3', 'POT1', 'SMAD3', 'MOB3B', 'TBX3', 'POLE', 'ATR', 'FANCD2', 'FH', 'BCORL1', 'SOX9', 'IKZF3', 'TSC1', 'TP63', 'MRE11A', 'SDHC', 'BTG1', 'POLD1', 'CIITA', 'SMC3', 'SAMHD1', 'RTEL1', 'ECT2L', 'PIK3R2', 'CRBN', 'FANCC', 'NBN', 'FANCA', 'HLA-B', 'RECQL4', 'DUSP4', 'ERCC2', 'FBXW7', 'TGFBR2', 'TGFBR1', 'MSH3', 'RBM15', 'TET1', 'TET3', 'SESN3', 'MGA', 'LTB', 'FOXL2', 'SH2B3', 'BCOR', 'HIST1H1D', 'ATRX', 'EP300', 'RAD51C', 'RAD51B', 'HIST1H1B', 'TNFAIP3', 'DICER1', 'ARID5B', 'LATS2', 'FOXO1', 'KEAP1', 'EZH2', 'SP140', 'NKX3-1', 'PBRM1', 'PALB2', 'CIC', 'BRCA1', 'DTX1', 'FLCN', 'SPEN', 'CD58', 'ERCC3', 'ERCC4', 'MSH6', 'BCL11B', 'BMPR1A', 'ERF', 'BRCA2', 'NOTCH2', 'EED', 'MITF', 'ELF3', 'SMARCA4', 'BBC3', 'ANKRD11', 'CEBPA', 'BCL2L11', 'AXIN2', 'AXIN1', 'CDK12', 'ESCO2', 'MLH1', 'SDHB', 'MED12', 'HNF1A', 'RYBP', 'ATP6V1B2', 'DNMT3B', 'KMT2B', 'KMT2A', 'DNMT3A', 'NFKBIA', 'TRAF5', 'KMT2D', 'SPOP', 'RBM10', 'P2RY8', 'TP53BP1', 'TSC2', 'KDM6A', 'EPCAM', 'PHOX2B', 'NPM1', 'BCL10', 'LATS1', 'HOXB13', 'ARID3A', 'PTPRT', 'PTPRS', 'INPPL1', 'NOTCH4', 'TET2', 'NOTCH1', 'CASP8', 'NOTCH3', 'GRIN2A', 'MAP2K4', 'WT1', 'BACH2', 'SDHA', 'BAP1', 'PTCH1', 'SDHD'])

In [12]:
oncoMaf = poleMafWithPentaContext[~poleMafWithPentaContext['Hugo_Symbol'].isin(tumorSuppresors)]

In [29]:
#print Counter(oncoMaf['pentaChange'])

print oncoMaf.shape[0], poleMafWithPentaContext.shape[0] - oncoMaf.shape[0]

qn = 'TCTG'

print 'oncogenes', oncoMaf[(oncoMaf['quadNuc'] == qn)].shape[0]
print 'tumor suppressors', poleMafWithPentaContext[(poleMafWithPentaContext['quadNuc'] == qn)].shape[0]
              
#print Counter(oncoMaf[(oncoMaf['quadNuc'] == 'TCTG') & (oncoMaf['oncogenic'].isnull())]['allele'])

4860 6839
oncogenes 1265
tumor suppressors 3030


In [120]:
#WE ONLY DO analysis on tumor suppressors and stop gain mutations
poleMafWithPentaContextTSTrunc = poleMafWithPentaContext[
    (poleMafWithPentaContext['Consequence'] == 'stop_gained') &
(poleMafWithPentaContext['Hugo_Symbol'].isin(tumorSuppresors))]

In [130]:
#Normalize the probabilities of mutations at the POLE most distinct trinucleotides (TCAT and TCTG)
poleMotifMuts = poleMafWithPentaContext[poleMafWithPentaContext['quadNuc'].isin(set(['TCAT', 'TCTG']))]
polePentaCounts = [(pentaNuc, count) for pentaNuc, count in Counter(poleMotifMuts['pentaChange']).items()]
normedPentaProbs = normalize_dict_to_sum_to_one(dict(polePentaCounts))



In [122]:
poleMafTSTruncPoleMotif = poleMafWithPentaContextTSTrunc[poleMafWithPentaContextTSTrunc['quadNuc'].isin(set(['TCAT', 'TCTG']))]


In [123]:
sortedGeneMuts = sorted(list(Counter(poleMafTSTruncPoleMotif['Hugo_Symbol']).items()), key = lambda x: -1*x[1])


In [111]:

print normedPentaProbsFiveMostCommon
#for gene, nmut in sortedGeneMuts:
#    geneMaf = poleMafWithPentaContextTSTruncCommonMotif[poleMafWithPentaContextTSTruncCommonMotif['Hugo_Symbol'] == gene]
#    alleleCntr = Counter(geneMaf['HGVSp_Short'])
#    print gene, nmut, len(alleleCntr), alleleCntr
#    print '______________'

{'TT(C>T)GA': 0.2613126783530371, 'TT(C>A)TC': 0.20587036282103546, 'TT(C>A)TT': 0.40358744394618834, 'AT(C>T)GA': 0.07664084794129637, 'TT(C>A)AT': 0.052588666938442725}


In [193]:
#DOES the specificty score (n steps to equalize)
#and the divergence score (expected distribution of mutations vs observed)

def compare_nmut_dist(allPentaProbs, observedMutationPentaCounts):
    observedPentas = observedMutationPentaCounts.keys()
    pentaProbsObs = [(penta, prob) for penta, prob in allPentaProbs.items() if penta in observedPentas]
    pentaProbsObsNormed = normalize_dict_to_sum_to_one(dict(pentaProbsObs))
    
    #calculate the number of mutations we expect etc
    nmut = sum(observedMutationPentaCounts.values())
    predictedPentaCounts = [(pentaChange, prob*nmut) for pentaChange, prob in pentaProbsObsNormed.items()]
    
    difSum = 0
    for gene, predictedCount in predictedPentaCounts:
        observedCount = observedMutationPentaCounts[gene]
        dif = abs(predictedCount - observedCount)
        difSum += dif
    score = (1.0*difSum)/nmut
    return score
    

def calculate_metrics_for_allele_gene_mutation_specificity(maf, pentaProbs):
    
    #a measure of the uneven-ness of a distribution of allele mutations: how many mutations it takes to equalize
    def count_n_steps_to_equalize(counts, nSteps):
        minIndex = 0
        maxIndex = len(counts) - 1
        minCount = counts[minIndex]
        maxCount = counts[maxIndex]

        if minCount + 1 >= maxCount: #if everything is equal OR min and max count are just separated by one return
            return nSteps
        else:
            nSteps += 1
            #adjust them
            counts[minIndex] = counts[minIndex] + 1
            counts[maxIndex] = counts[maxIndex] - 1
            return count_n_steps_to_equalize(sorted(counts), nSteps)

    
    listOfDicts = []
    for gene in set(maf['Hugo_Symbol']):
        
        geneMaf = maf[maf['Hugo_Symbol'] == gene]
        alleleCounts = Counter(geneMaf['HGVSp_Short'])
        rawCounts = [i[1] for i in alleleCounts.items()]
        nMutations = sum(rawCounts)
        nAlleles = len(rawCounts)
        
        if nMutations > 0:
            maxAllele = alleleCounts.most_common()[0][0]
            maxCount = alleleCounts.most_common()[0][1]
            nSteps = count_n_steps_to_equalize(sorted(rawCounts), 0)
            geneCounts = Counter(geneMaf['pentaChange'])
            differenceScore = compare_nmut_dist(pentaProbs, geneCounts)
            
            listOfDicts.append({'Hugo_Symbol': gene, 'nMutations': nMutations, 'nSteps': nSteps,
                               'nAlleles': nAlleles, 'maxAlleleCount': maxCount, 'maxAllele': maxAllele,
                                'diffScore': differenceScore
                               })
        
    return pd.DataFrame(listOfDicts)
    

In [139]:
arid1aCts = Counter(poleMafTSTruncPoleMotif[poleMafTSTruncPoleMotif['Hugo_Symbol'] == 'ARID1A']['pentaChange'])


In [194]:
df = calculate_metrics_for_allele_gene_mutation_specificity(poleMafTSTruncPoleMotif, normedPentaProbs)

#gene = 'BRCA2'
#geneCounts = Counter(poleMafTSTruncPoleMotif[poleMafTSTruncPoleMotif['Hugo_Symbol'] == gene]['pentaChange'])
#compare_nmut_dist(normedPentaProbs, geneCounts)

In [207]:
df['displayName'] = df.apply(lambda row: row['Hugo_Symbol'] if row['nSteps'] > 3 or row['nMutations'] > 12 else None, axis=1)

In [208]:
df.to_csv('/Users/friedman/Desktop/WORK/dataForLocalPlotting/alleleSpecificity.tsv', index=False, sep='\t')

**Stuff from offline august 22**<br/><br/><br/>

In [237]:
poleMafPoleMainOnly = poleMafWithPentaContext[poleMafWithPentaContext['quadNuc'].isin(set(['TCAT', 'TCTG']))]

Counter({'TT(C>A)TT': 990, 'TT(C>T)GA': 641, 'TT(C>A)TC': 505, 'TT(C>T)GG': 349, 'TT(C>T)GT': 338, 'TT(C>T)GC': 222, 'TT(C>A)TG': 215, 'AT(C>A)TT': 207, 'AT(C>T)GA': 188, 'GT(C>T)GA': 168, 'TT(C>A)TA': 149, 'CT(C>T)GA': 141, 'AT(C>T)GT': 139, 'AT(C>A)TC': 136, 'GT(C>T)GT': 134, 'CT(C>T)GG': 128, 'GT(C>A)TT': 128, 'GT(C>T)GG': 117, 'AT(C>T)GG': 112, 'CT(C>T)GT': 105, 'AT(C>T)GC': 93, 'CT(C>T)GC': 84, 'GT(C>T)GC': 71, 'GT(C>A)TC': 68, 'CT(C>A)TT': 66, 'AT(C>A)TA': 58, 'AT(C>A)TG': 58, 'CT(C>A)TC': 41, 'GT(C>A)TG': 34, 'CT(C>A)TG': 26, 'GT(C>A)TA': 13, 'CT(C>A)TA': 12})


In [248]:
poleMafTruncOnly = poleMafPoleMainOnly[poleMafPoleMainOnly['Consequence'] == 'stop_gained']
poleMafTruncOnly['allele'] = poleMafTruncOnly.apply(lambda row: row['Hugo_Symbol'] + '_' + row['HGVSp_Short'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [260]:
listOfDicts = []
for allele in set(poleMafTruncOnly['allele']):
    alleleMaf = poleMafTruncOnly[poleMafTruncOnly['allele'] == allele]
    penta = alleleMaf['pentaChange'].iloc[0]
    gene = alleleMaf['Hugo_Symbol'].iloc[0]
    nmut = alleleMaf.shape[0]
    listOfDicts.append({'allele': allele, 'nmut': nmut, 'penta': penta, 'gene': gene})

df = pd.DataFrame(listOfDicts)

In [265]:
pentaChangeAverages = dict([(penta, np.nanmean(df[df['penta'] == penta]['nmut'])) for penta in set(df['penta'])])


In [266]:
#pentaChangeCounter = Counter(poleMafPoleMainOnly[poleMafPoleMainOnly['Consequence'] == 'stop_gained']['pentaChange'])
df['orderingVal'] = df['penta'].apply(lambda x: pentaChangeAverages[x])
df['tumorSup'] = df['gene'].apply(lambda x: True if x in tumorSuppresors else False)

In [267]:
df.to_csv('/Users/friedman/Desktop/WORK/poleSitOccurenceInfo.tsv', index=False, sep='\t')

In [275]:
print sorted(zip(df['nmut'], df['allele'], df['penta']), reverse=True)[:20]

[(22, 'ARID1A_p.R1989*', 'TT(C>T)GA'), (18, 'TP53_p.R213*', 'TT(C>T)GA'), (17, 'NF1_p.R2450*', 'TT(C>T)GA'), (16, 'PIK3R1_p.R348*', 'TT(C>T)GA'), (14, 'ATM_p.R250*', 'TT(C>T)GA'), (11, 'CASP8_p.E239*', 'TT(C>A)TT'), (10, 'PTEN_p.E7*', 'CT(C>A)TT'), (7, 'SMAD4_p.E520*', 'TT(C>A)TT'), (7, 'FBXW7_p.R658*', 'TT(C>T)GA'), (7, 'ASXL2_p.R312*', 'AT(C>T)GA'), (7, 'APC_p.R2237*', 'TT(C>T)GA'), (7, 'APC_p.R1114*', 'AT(C>T)GA'), (6, 'TP63_p.R594*', 'TT(C>T)GA'), (6, 'RASA1_p.E429*', 'TT(C>A)TT'), (6, 'NF1_p.R440*', 'TT(C>T)GA'), (6, 'MSH2_p.E580*', 'TT(C>A)TT'), (6, 'MET_p.R1170*', 'TT(C>T)GA'), (6, 'EPHA7_p.R895*', 'TT(C>T)GA'), (6, 'ASXL2_p.E45*', 'TT(C>A)TT'), (6, 'APC_p.R2204*', 'TT(C>T)GA')]


**TMZ hypermutated GLIOMA**

In [6]:
tmzMaf = allImpactMuts[allImpactMuts['Tumor_Sample_Barcode'].isin(casesWithTMZSignature)]

In [13]:
tmzMotifs = set(['ACTC', 'ACTT', 'CCTC', 'CCTT', 'GCTC', 'GCTT', 'TCTC', 'TCTT', 
                'ACTA', 'CCTA', 'GCTA', 'TCTA'])
tmzMutatedGliomaMaf = tmzMaf[tmzMaf['quadNuc'].isin(tmzMotifs)]
print Counter(tmzMutatedGliomaMaf['allele'])

#print tmzM

Counter({'TERT_nan': 51, 'MSH6_p.T1219I': 8, 'KRAS_p.G12D': 7, 'EGFR_p.A289V': 6, 'PIK3CA_p.E545K': 5, 'EGFR_p.R108K': 4, 'PIK3CA_p.E542K': 4, 'AKT1_p.E17K': 4, 'NF2_p.P135L': 3, 'IRF4_p.P392S': 3, 'PARK2_p.D115N': 3, 'MET_nan': 3, 'FGFR1_p.S437N': 3, 'BCL6_p.E503K': 3, 'BCOR_p.T870I': 3, 'ETV1_p.X185_splice': 3, 'SMAD2_p.D59N': 3, 'PBRM1_p.G1355E': 3, 'NOTCH4_p.P1531S': 3, 'CDKN2A_p.E88K': 3, 'CDKN2A_p.W110*': 3, 'STAG2_p.V426I': 3, 'IDH2_p.R43K': 3, 'NOTCH1_p.G1434D': 3, 'RBM10_p.D139N': 3, 'MSH6_p.T767I': 3, 'ALK_p.Q515*': 3, 'MAP3K13_p.S649F': 3, 'CIC_p.G1582E': 3, 'TP53_p.G244S': 3, 'PIK3CA_p.G118D': 3, 'ARID2_p.R401K': 3, 'NOTCH3_p.G1059E': 3, 'PTPN11_p.E69K': 3, 'BCL6_p.S300N': 3, 'SMAD4_p.A15V': 3, 'KDM5C_p.T937I': 3, 'STK11_p.V320M': 3, 'BRIP1_p.E157K': 3, 'ARID1B_p.P1352L': 3, 'MET_p.V1078M': 3, 'DICER1_p.G1809R': 3, 'BRD4_p.V196I': 3, 'PIK3CD_p.E607K': 3, 'RPS6KB2_p.D354N': 3, 'TSC2_p.R1044K': 2, 'PTPN11_p.G115E': 2, 'ASXL1_p.V1396M': 2, 'PDCD1_p.P160S': 2, 'EGFR_p.G601E': 2