## Setup notebook

In [257]:
import pandas as pd
import numpy as np
import math
#from scipy import stats

# plotting
#import matplotlib as mpl

#import pickle

import re
#from collections import Counter
from collections import defaultdict

import yaml
import builtins

# jupyter notebook amenities
#%matplotlib inline
%autosave 10

Autosaving every 10 seconds


## Read in data

### Drug and Biomarkers

In [270]:
oncokb_yaml_file = '../OncoKB-Drug-Information/oncokb.yaml'
oncokb_text_file = '../OncoKB-Drug-Information/allActionableVariants.txt'

#with open(oncokb_yaml_file, 'r') as f:
#    oncokb = yaml.load(f)

oncokb_table = pd.read_csv(oncokb_text_file, sep='\t', header=0, index_col=None)


#oncokb = defaultdict(list)
#for i, row in oncokb_table.iterrows():
#    # Save desired values for a given alteration
#    v = {'Gene': row.Gene,
#         'Alteration': row.Alteration,
#         'Subtype': row['Cancer Type'],
#         'Level': row.Level}
#    # Save for each drug listed for an alteration
#    drugs = row['Drugs(s)'].split(', ')
#    for drug in drugs:
#        oncokb[drug].append(v)

### Mutation data

In [16]:
tcga_mutation_filename = '../../../../TCGA_Pancanatlas_SNVs_out.tsv'
tcga_mutation_data = pd.read_csv(tcga_mutation_filename, sep='\t', header=0, index_col=None)

print(tcga_mutation_data.shape)
print(list(tcga_mutation_data))

  interactivity=interactivity, compiler=compiler, result=result)


### CNV data

In [18]:
tcga_CNV_filename = '../../../../TCGA_Pancanatlas_CNV_Thresholded_out.tsv'
tcga_cnv_data = pd.read_csv(tcga_CNV_filename, sep='\t', header=0, index_col=0)

print(tcga_cnv_data.shape)

(25128, 10715)


### Fusion data

#### tumorfusions.org

In [248]:
tumorfusions_filename = "../../../../fusion_data/pancanfus_newline.txt"
tcga_fusion_data = pd.read_csv(tumorfusions_filename, sep='\t', header=0)

tcga_fusion_data["sampleId"] = ["TCGA-"+x[0:2]+"-"+x[3:7]+"-"+x[8:] for x in tcga_fusion_data["sampleId"]]

print(tcga_fusion_data.shape)
print(len(set(tcga_fusion_data["sampleId"])))
samples_in_fusion_data = [x[:15] for x in tcga_fusion_data["sampleId"]]
print(len(set(samples_in_fusion_data)))

(17754, 27)
5517
5499


### Clinical data

In [21]:
tcga_clinical_filename = '../../../../TCGA_Pancanatlas_Clinical_out.tsv'
tcga_clinical_data = pd.read_csv(tcga_clinical_filename, sep='\t', header=0, index_col=0,encoding ='latin1')

print(list(tcga_clinical_data))

['bcr_patient_barcode', 'acronym', 'gender', 'vital_status', 'days_to_birth', 'days_to_death', 'days_to_last_followup', 'days_to_initial_pathologic_diagnosis', 'age_at_initial_pathologic_diagnosis', 'icd_10', 'tissue_retrospective_collection_indicator', 'icd_o_3_histology', 'tissue_prospective_collection_indicator', 'history_of_neoadjuvant_treatment', 'icd_o_3_site', 'tumor_tissue_site', 'new_tumor_event_after_initial_treatment', 'radiation_therapy', 'race', 'project_code', 'prior_dx', 'disease_code', 'ethnicity', 'informed_consent_verified', 'person_neoplasm_cancer_status', 'patient_id', 'year_of_initial_pathologic_diagnosis', 'histological_type', 'tissue_source_site', 'form_completion_date', 'pathologic_T', 'pathologic_M', 'clinical_M', 'pathologic_N', 'system_version', 'pathologic_stage', 'stage_other', 'clinical_stage', 'clinical_T', 'clinical_N', 'extranodal_involvement', 'postoperative_rx_tx', 'primary_therapy_outcome_success', 'lymph_node_examined_count', 'primary_lymph_node_pre

  interactivity=interactivity, compiler=compiler, result=result)


### TCGA ID mapping

In [22]:
tcga_id_mapping_filename = '../../../../TCGA_id_mapping.tsv'
tcga_id_map = pd.DataFrame.from_csv(tcga_id_mapping_filename, sep='\t', header=0)
print(tcga_id_map.shape)

(12873, 8)


### Samples in the different data types

In [250]:
# Mutation data
samples_in_mutation_data = list(set(tcga_id_map.loc[tcga_id_map["SNVs_id"].isin(set(tcga_mutation_data["Tumor_Sample_Barcode"]))
                                          ,"sample_id"]))
print(len(samples_in_mutation_data))


# CNV data
samples_in_cnv_data = list(set(tcga_id_map.loc[tcga_id_map["CNV_id"].isin(list(tcga_cnv_data)[2:])
                                          ,"sample_id"]))
print(len(samples_in_cnv_data))

# Fusion data
samples_in_fusion_data = list(set([x for x in samples_in_fusion_data if x in list(tcga_id_map["sample_id"])]))
print(len(samples_in_fusion_data))
# use sample IDs from expression data as a background list (PRADA is working on RNA-Seq data)
samples_in_expression_data = list(set([str(i)[:15] for i in tcga_id_map["mRNA_id"]]))
print(len(samples_in_expression_data))
samples_in_fusion_data = list(set(samples_in_fusion_data + samples_in_expression_data))
print(len(samples_in_fusion_data))


# Clinical data
samples_in_clinical_data = list(set(tcga_id_map.loc[tcga_id_map["Clinical_id"].isin(set(tcga_clinical_data["bcr_patient_barcode"]))
                                          ,"sample_id"]))
print(len(samples_in_clinical_data))


# All sample IDs
set_of_tcga_samples = set(tcga_id_map["sample_id"])
set_of_tcga_samples = [x for x in set_of_tcga_samples if x == x] #excludes 'nan' from list
print(len(set_of_tcga_samples))

10510
10713
5498
11061
11117
12384
12747


## Functions defining binary biomarker labels

In [214]:
# Define what is a nonsynonymous mutation
print(Counter(tcga_mutation_data["Variant_Classification"]))

non_synonymous_mutations = {'Translation_Start_Site','Missense_Mutation','Nonsense_Mutation','Nonstop_Mutation'
                            ,'In_Frame_Ins','In_Frame_Del','Frame_Shift_Ins','Frame_Shift_Del'
                            , 'Splice_Site'}

indel_mutations = {'In_Frame_Ins','In_Frame_Del','Frame_Shift_Ins','Frame_Shift_Del'}
insertion_mutations = {'In_Frame_Ins','Frame_Shift_Ins'}
deletion_mutations = {'In_Frame_Del','Frame_Shift_Del'}

truncating_mutations = {'Nonsense_Mutation','Frame_Shift_Ins','Frame_Shift_Del'}

Counter({'Intron': 11040579, 'Missense_Mutation': 2888649, 'Frame_Shift_Del': 1622149, 'IGR': 1271910, 'Silent': 1238725, 'RNA': 1034254, "3'UTR": 800627, "3'Flank": 752922, "5'Flank": 712387, 'Frame_Shift_Ins': 404988, "5'UTR": 246952, 'Nonsense_Mutation': 237134, 'Splice_Site': 133921, 'In_Frame_Del': 59425, 'In_Frame_Ins': 33462, 'Translation_Start_Site': 4310, 'Nonstop_Mutation': 3176, 'Targeted_Region': 57})


In [251]:
def get_nonsyn_mutations(gene_name):
    """
    get all nonsynonymous mutations for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not mutated, 1=at least one non-synonymous mutation)
    """
    mutation_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_mutation_data:
        mutation_dict[sample] = 0
    
    for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == gene_name)
                                & (tcga_mutation_data['Variant_Classification'].isin(non_synonymous_mutations))
                                ,"Tumor_Sample_Barcode"]:
        if k[:15] in mutation_dict:
            mutation_dict[k[:15]] = 1

    return mutation_dict

def get_truncating_mutations(gene_name):
    """
    get all nonsynonymous mutations for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not mutated, 1=at least one non-synonymous mutation)
    """
    mutation_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_mutation_data:
        mutation_dict[sample] = 0
    
    for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == gene_name)
                                & (tcga_mutation_data['Variant_Classification'].isin(truncating_mutations))
                                ,"Tumor_Sample_Barcode"]:
        mutation_dict[k[:15]] = 1

    return mutation_dict

def get_nonsyn_mutations_in_exons(gene_name, exon_list):
    """
    get all nonsynonymous mutations for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not mutated, 1=at least one non-synonymous mutation)
    """
    mutation_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_mutation_data:
        mutation_dict[sample] = 0

    for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == "KIT")
                                & (tcga_mutation_data['Variant_Classification'].isin(non_synonymous_mutations))
                                & ([x.startswith(tuple(exon_list)) for x in tcga_mutation_data['Exon_Number']])
                                ,"Tumor_Sample_Barcode"]:
        mutation_dict[k[:15]] = 1

    return mutation_dict

def get_indel_mutations_in_exons(gene_name, exon_list, keyword='indel'):
    """
    get all nonsynonymous mutations for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not mutated, 1=at least one non-synonymous mutation)
    """
    mutation_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_mutation_data:
        mutation_dict[sample] = 0

    if keyword == 'indel':
        for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == "KIT")
                                & (tcga_mutation_data['Variant_Classification'].isin(indel_mutations))
                                & ([x.startswith(tuple(exon_list)) for x in tcga_mutation_data['Exon_Number']])
                                ,"Tumor_Sample_Barcode"]:
            mutation_dict[k[:15]] = 1
    elif keyword == 'insertion':
        for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == "KIT")
                                & (tcga_mutation_data['Variant_Classification'].isin(insertion_mutations))
                                & ([x.startswith(tuple(exon_list)) for x in tcga_mutation_data['Exon_Number']])
                                ,"Tumor_Sample_Barcode"]:
            mutation_dict[k[:15]] = 1

    elif keyword == 'deletion':
        for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == "KIT")
                                & (tcga_mutation_data['Variant_Classification'].isin(deletion_mutations))
                                & ([x.startswith(tuple(exon_list)) for x in tcga_mutation_data['Exon_Number']])
                                ,"Tumor_Sample_Barcode"]:
            mutation_dict[k[:15]] = 1
            
    else:
        print("no valid keyword provided: has to be one of [indel,insertion,deletion]")

    return mutation_dict

def get_any_mutations(gene_name):
    """
    get all mutations for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not mutated, 1=at least one mutation)
    """
    mutation_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_mutation_data:
        mutation_dict[sample] = 0

    for k in tcga_mutation_data.loc[(tcga_mutation_data['Hugo_Symbol'] == gene_name)
                                ,"Tumor_Sample_Barcode"]:
        mutation_dict[k[:15]] = 1
        
    return mutation_dict

def get_point_mutations(gene_name,list_of_AA_changes):
    
    point_mut_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_mutation_data:
        point_mut_dict[sample] = 0
    
    list_of_AA_changes = ["p."+change for change in list_of_AA_changes]
    for k in tcga_mutation_data.loc[(tcga_mutation_data["Hugo_Symbol"]==gene_name)
                       &(tcga_mutation_data["HGVSp_Short"].isin(list_of_AA_changes))
                       ,"Tumor_Sample_Barcode"]:
        point_mut_dict[k[:15]] = 1
    
    return point_mut_dict

def get_deletions(gene_name):
    """
    get all deletions (-2 in the thresholded data) for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not deleted, 1= deleted (-2))
    """
    
    cnv_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_cnv_data:
        cnv_dict[sample] = 0
    
    for k in tcga_cnv_data.columns[tcga_cnv_data.loc[gene_name,] == -2]:
        cnv_dict[k[:15]] = 1
    
    return cnv_dict

def get_deletions_1(gene_name):
    """
    get all deletions (-1 in the thresholded data) for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not deleted, 1= deleted (-1))
    """
    cnv_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_cnv_data:
        cnv_dict[sample] = 0
        
    for k in tcga_cnv_data.columns[tcga_cnv_data.loc[gene_name,] == -1]:
        cnv_dict[k[:15]] = 1 
    
    return cnv_dict

def get_amplifications(gene_name):
    """
    get all amplifications (2 in the thresholded data) for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not amplified, 1= amplified (2))
    """
    cnv_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_cnv_data:
        cnv_dict[sample] = 0
        
    for k in tcga_cnv_data.columns[tcga_cnv_data.loc[gene_name,] == 2]:
        cnv_dict[k[:15]] = 1  

    return cnv_dict

def get_amplifications_1(gene_name):
    """
    get all amplifications (1 in the thresholded data) for a specific gene from the TCGA data
    returns a dictionary with sample_name:0/1 (0=not amplified, 1= amplified (1))
    """
    
    cnv_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_cnv_data:
        cnv_dict[sample] = 0
    
    for k in tcga_cnv_data.columns[tcga_cnv_data.loc[gene_name,] == 1]:
        cnv_dict[k[:15]] = 1   

    return cnv_dict

def get_fusion_genes(gene1, gene2):
    
    fusion_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_fusion_data:
        fusion_dict[sample] = 0
    
    for k in tcga_fusion_data.loc[(tcga_fusion_data['Gene_A'] == gene1)
                                & (tcga_fusion_data['Gene_B'] == gene2)
                                ,"sampleId"]:
        if k[:15] in fusion_dict:
            fusion_dict[k[:15]] = 1
        
    for k in tcga_fusion_data.loc[(tcga_fusion_data['Gene_A'] == gene2)
                                & (tcga_fusion_data['Gene_B'] == gene1)
                                ,"sampleId"]:
        if k[:15] in fusion_dict:
            fusion_dict[k[:15]] = 1
    
    return fusion_dict

def get_fusion_gene(gene):
    
    fusion_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    for sample in samples_in_fusion_data:
        fusion_dict[sample] = 0
    
    for k in tcga_fusion_data.loc[(tcga_fusion_data['Gene_A'] == gene)
                                ,"sampleId"]:
        if k[:15] in fusion_dict:
            fusion_dict[k[:15]] = 1
    
    for k in tcga_fusion_data.loc[(tcga_fusion_data['Gene_B'] == gene)
                                ,"sampleId"]:
        if k[:15] in fusion_dict:
            fusion_dict[k[:15]] = 1
    
    return fusion_dict

def get_biallelic_inactivation(gene_name):

    nonsyn_mutation_and_deletion = intersect_attribute([get_deletions_1(gene_name),get_nonsyn_mutations(gene_name)])
    biallelic_inactivation = union_attribute([get_deletions(gene_name),nonsyn_mutation_and_deletion])
    
    return biallelic_inactivation

def union_attribute(list_of_attribute_dicts):
    # union of keys
    keys = list()
    for attribute in list_of_attribute_dicts:
        keys.extend(attribute.keys())
        keys = list(set(keys))

    # Fill in 0s for non-missing data
    union_dict = dict([ (sample_name, None) for sample_name in keys ])
    for k in keys:
        for attribute in list_of_attribute_dicts:
            if k in attribute:
                if attribute[k] == 0:
                    union_dict[k] = 0
    
    # Overwrite with 1s (build union)
    for k in keys:
        for attribute in list_of_attribute_dicts:
            if k in attribute:
                if attribute[k] == 1:
                    union_dict[k] = 1
               
    return union_dict

def intersect_attribute(list_of_attribute_dicts):
    # union of keys
    keys = list()
    for attribute in list_of_attribute_dicts:
        keys.extend(attribute.keys())
        keys = list(set(keys))
    
    intersect_dict = dict([ (sample_name, None) for sample_name in keys ])
    
    # Fill in 0s for non-missing data
    for k in keys:
        in_all_dicts = True
        for attribute in list_of_attribute_dicts:
            if k in attribute:
                if attribute[k] == None:
                    in_all_dicts = False
        if in_all_dicts:
            intersect_dict[k] = 0
            
    # Overwrite with 1s (intersection)
    for k in keys:
        in_all_dicts = True
        for attribute in list_of_attribute_dicts:
            if k in attribute:
                if attribute[k] == 0:
                    in_all_dicts = False
                if attribute[k] == None:
                    in_all_dicts = False
        if in_all_dicts:
            intersect_dict[k] = 1
                    
    return intersect_dict

def reverse_attribute(attribute):
    reverse_dict = dict([ (sample_name, None) for sample_name in attribute.keys() ])
    for k in attribute.keys():
        if attribute[k]==0:
            reverse_dict[k] = 1
        if attribute[k]==1:
            reverse_dict[k] = 0
    return reverse_dict
    

def how_many_positive_samples(attribute_dict):
    return sum(1 for v in attribute_dict.values() if v==1)

def how_many_negative_samples(attribute_dict):
    return sum(1 for v in attribute_dict.values() if v==0)

## Create biomarker labels

In [272]:
# create empty columns for TCGA samples
print(oncokb_table.shape)
for sample in set_of_tcga_samples:
    oncokb_table[sample] = np.nan

print(oncokb_table.shape)
print(len(set_of_tcga_samples))



(264, 10)
(264, 12757)
12747


In [276]:
# fill the oncokb table with biomarker labels
for index, row in oncokb_table.iterrows():
    
    feature_vec = 0
    
    # Fusion biomarker
    if "Fusion" in row["Alteration"]:  
        if row["Alteration"]=="Fusions":
            feature_vec = get_fusion_gene(row["Gene"])
            #print(row[:7])
            #print(how_many_positive_samples(feature_vec))
        else:
            fusion_genes = row["Alteration"].split(' ')[0].split('-')
            feature_vec = get_fusion_genes(fusion_genes[0],fusion_genes[1])
            #print(row[:7])
            #print(how_many_positive_samples(feature_vec))
            
            
    # Copy number biomarker - TODO: No deletions?
    elif "Amplification" in row["Alteration"]:
        feature_vec = get_amplifications(row["Gene"])
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))
    elif "Deletion" in row["Alteration"]:
        print(row[:7])
        
        
    # Point mutations
    elif re.fullmatch(r'[A-Z][0-9]{1,5}[A-Z]', row["Alteration"], flags=0):
        feature_vec = get_point_mutations(row["Gene"],[row["Alteration"]])
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))

        
    # Exon specific mutations
    elif "Exon" in row["Alteration"]:
        exon = row["Alteration"].split(' ')[1]
        if "mutations" in row["Alteration"]:
            feature_vec = get_nonsyn_mutations_in_exons(row["Gene"],[exon])
            #print(row[:7])
            #print(how_many_positive_samples(feature_vec))
                
        elif "insertion" in row["Alteration"] and "deletion" in row["Alteration"]:
            feature_vec = get_indel_mutations_in_exons(row["Gene"],[exon],'indel')
            #print(row[:7])
            #print(how_many_positive_samples(feature_vec))
                
        elif "insertion" in row["Alteration"] and not "deletion" in row["Alteration"]:
            feature_vec = get_indel_mutations_in_exons(row["Gene"],[exon],'insertion')
            #print(row[:7])
            #print(how_many_positive_samples(feature_vec))
                
        elif "deletion" in row["Alteration"] and not "insertion" in row["Alteration"]:
            feature_vec = get_indel_mutations_in_exons(row["Gene"],[exon],'deletion')
            #print(row[:7])
            #print(how_many_positive_samples(feature_vec))

    # Nonsense and Frame-shift mutations
    elif "Truncating" in row["Alteration"]:
        feature_vec = get_truncating_mutations(row["Gene"])
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))

        
    elif "splice" in row["Alteration"]:
        feature_vec = get_point_mutations(row["Gene"],[row["Alteration"]])
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))

        
    elif "Wildtype" in row["Alteration"]:
        feature_vec = reverse_attribute(get_nonsyn_mutations(row["Gene"]))
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))

        
    # special EGFR mutations        
    elif "A763_Y764insFQEA" in row["Alteration"]:
        feature_vec = get_point_mutations(row["Gene"],[row["Alteration"]])
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))
    elif "E709_T710delinsD" in row["Alteration"]:
        feature_vec = get_point_mutations(row["Gene"],[row["Alteration"]])
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))
 

    # TODO: check papers for mutations
    elif "Oncogenic Mutations" in row["Alteration"]:
        print(row[:7])
        
        
    # only very few annotations - TODO: define on mutation data!
    elif "Microsatellite Instability-High" in row["Alteration"]:
        instability_dict = dict([ (sample_name, None) for sample_name in set_of_tcga_samples ])
    
        for k in tcga_clinical_data.loc[(tcga_clinical_data['microsatellite_instability'] == "YES")
                                ,"bcr_patient_barcode"]:
            matching_sample_ids = [str(x) for x in set_of_tcga_samples if k in str(x)]
            for sid in matching_sample_ids:
                instability_dict[sid] = 1
                    
        for k in tcga_clinical_data.loc[(tcga_clinical_data['microsatellite_instability'] == "NO")
                                ,"bcr_patient_barcode"]:
            matching_sample_ids = [str(x) for x in set_of_tcga_samples if k in str(x)]
            for sid in matching_sample_ids:
                instability_dict[sid] = 0
            
        feature_vec = instability_dict
        #print(row[:7])
        #print(how_many_positive_samples(feature_vec))
        
        
    else:
        print(row[:7])
            
            
            
        
    if(feature_vec):
        for sample in set_of_tcga_samples:
            oncokb_table.iloc[index,list(oncokb_table).index(sample)] = feature_vec[sample]
            #if feature_vec[sample] == 1:
            #    print(sample)
        

Isoform                         ENST00000320356
RefSeq                              NM_004456.4
Entrez Gene ID                             2146
Gene                                       EZH2
Alteration                  Oncogenic Mutations
Cancer Type       Diffuse Large B-Cell Lymphoma
Level                                         4
Name: 2, dtype: object
Isoform               ENST00000298552
RefSeq                    NM_000368.4
Entrez Gene ID                   7248
Gene                             TSC1
Alteration        Oncogenic Mutations
Cancer Type                CNS Cancer
Level                              2A
Name: 10, dtype: object
Isoform                ENST00000298552
RefSeq                     NM_000368.4
Entrez Gene ID                    7248
Gene                              TSC1
Alteration         Oncogenic Mutations
Cancer Type       Renal Cell Carcinoma
Level                               2A
Name: 11, dtype: object
Isoform               ENST00000288135
RefSeq          

Isoform                             ENST00000241453
RefSeq                                  NM_004119.2
Entrez Gene ID                                 2322
Gene                                           FLT3
Alteration        FLT3 internal tandem duplications
Cancer Type                  Acute Myeloid Leukemia
Level                                            3A
Name: 242, dtype: object
Isoform               ENST00000371953
RefSeq                    NM_000314.4
Entrez Gene ID                   5728
Gene                             PTEN
Alteration        Oncogenic Mutations
Cancer Type             Breast Cancer
Level                               4
Name: 244, dtype: object
Isoform               ENST00000371953
RefSeq                    NM_000314.4
Entrez Gene ID                   5728
Gene                             PTEN
Alteration        Oncogenic Mutations
Cancer Type                All Tumors
Level                               4
Name: 245, dtype: object
Isoform               ENST000

In [284]:
# write oncokb table to file
oncokb_table.to_csv("./allActionableVariants_TCGAsamples.tsv", sep='\t', index=False, na_rep = "NaN")