In [1]:
import sys
import argparse
import os
import pandas as pd
import numpy as np
import re

sys.path.append('/Users/friedman/Desktop/hypermutationProjectFinal/scripts/utilityScripts')
import configuration_util
import analysis_utils
import mutationSigUtils
import maf_analysis_utils

filePathDict = configuration_util.get_all_files_path_dict()

In [2]:
def get_per_case_oncogenic_mut_info(muts):
    oncogenicMuts = muts[muts['oncogenic'].notnull()]
    nMutOncDict = dict(oncogenicMuts['Tumor_Sample_Barcode'].value_counts())
    return nMutOncDict

def get_per_case_hotspot_mut_info(muts):
    hotspotMuts = muts[muts['is-a-hotspot'] == 'Y']
    nHotspotDict = dict(hotspotMuts['Tumor_Sample_Barcode'].value_counts())
    return nHotspotDict


In [3]:
allImpactMuts = pd.read_table(filePathDict['IMPACT_BASE_MAF'])

  """Entry point for launching an IPython kernel.
  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
perCaseOncogenicInfo = get_per_case_oncogenic_mut_info(allImpactMuts)
perCaseHotspotInfo = get_per_case_hotspot_mut_info(allImpactMuts)

In [5]:
hypermutantStatusDir = filePathDict['HYPERMUTATION_STATUS_IDS']
normalEndometrial = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType='Endometrial Cancer', hypermutantStatus = 'Normal')
normalColorectal = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType='Colorectal Cancer', hypermutantStatus = 'Normal')
normalGlioma = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType='Glioma', hypermutantStatus = 'Normal')

hyperEndometrial = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType='Endometrial Cancer', hypermutantStatus = 'Hypermutated')
hyperColorectal = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType='Colorectal Cancer', hypermutantStatus = 'Hypermutated')
hyperGlioma = analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType='Glioma', hypermutantStatus = 'Hypermutated')


  df = pd.read_table(path)


In [6]:
otherNormal = set([])
otherHypermutated = set([])
for f in os.listdir(hypermutantStatusDir):
    cType = re.sub('_', ' ', f)[:-4]
    otherNormal = otherNormal | analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType=cType, hypermutantStatus = 'Normal')
    otherHypermutated = otherHypermutated | analysis_utils.get_ids_by_hypermutant_status(hypermutantIdDir=hypermutantStatusDir, cancerType=cType, hypermutantStatus = 'Hypermutated')


In [7]:
#a dict mapping cohorts to names
cohortsAndNames = {'normal_Endometrial': normalEndometrial, 'normal_Colorectal': normalColorectal, 'normal_Glioma': normalGlioma,
                  'hyper_Endometrial': hyperEndometrial, 'hyper_Colorectal': hyperColorectal, 'hyper_Glioma': hyperGlioma,
                   'normal_Other': otherNormal, 'hyper_Other': otherHypermutated
                  }

listOfDicts = []
for cohortName, cohort in cohortsAndNames.items():
    for tid in cohort:
        nOncogenicMuts = None
        nHotspotMuts = None
        if tid in perCaseOncogenicInfo:
            nOncogenicMuts = perCaseOncogenicInfo[tid]
        if tid in perCaseHotspotInfo:
            nHotspotMuts = perCaseHotspotInfo[tid]
        
        listOfDicts.append({'Tumor_Sample_Barcode': tid, 'nOncMuts': nOncogenicMuts,
                            'nHotspots': nHotspotMuts, 'cohort': cohortName,
                           })

df = pd.DataFrame(listOfDicts)
            

In [8]:
orderingValDict = {'normal_Colorectal': 1, 'hyper_Colorectal': 2, 'normal_Endometrial': 3, 'hyper_Endometrial': 4,
                  'normal_Glioma': 5, 'hyper_Glioma': 6, 'normal_Other': 7, 'hyper_Other': 8}

df['orderingVal'] = df['cohort'].apply(lambda x: orderingValDict[x])
df['cancerType'] = df['cohort'].apply(lambda x: x.split('_')[1])

In [9]:
writeDir = '/Users/friedman/Desktop/hypermutationProjectFinal/scripts/figure1/FIGURE1_PLOTTING_FILES/'
df.to_csv(os.path.join(writeDir, 'figure1c_nOncMutByCohort.tsv'), index=False, sep = '\t')

In [20]:
#hypermutantsOnlyMaf.to_csv(pathPrefix + '/juno/work/taylorlab/friedman/myAdjustedDataFiles/data_mutations_extended_annotated_nov19_hypermutantOnly_2019.maf', index=False, sep='\t')
