###### Calculate TMB scores from MAF files 

- This script takes in an MAF file that is in BED file format(without the first three columns)
- The MAF file should contain all samples 
- It also takes in a manifest/histology and output file name 
- It looks for variants that show `Missense_Mutation`  or `Nonsense_Mutation`  in
    `Variant_Classification`  column and calculates TMB score per sample
- Each sample is matched up with the disease type from the histology and 
    output will be printed with sample name, disease type and TMB score in each line
    


In [None]:
import pandas as pd
import numpy as np

maf_file=pd.read_csv("inputs/temp.withintarget.bed", sep="\t", index_col=False)
maf_file = maf_file.fillna("NA")

manifest = pd.read_csv("inputs/pbta-histologies.tsv", sep="\t", index_col=False)

sample_tmb_colnames = ["Samplenames", "TMB"]
sample_tmb_df = pd.DataFrame(columns=sample_tmb_colnames)

var_by_sample = maf_file.groupby("Tumor_Sample_Barcode")
for name in var_by_sample:
    sample_name = name[0]
    affected_var = name[1].loc[(name[1]['Variant_Classification'] == 'Missense_Mutation')|( name[1]['Variant_Classification'] ==  'Nonsense_Mutation')]
    count = affected_var.shape[0]
    sample_tmb_df = sample_tmb_df.append({"Samplenames" : sample_name , "TMB" : str(count*1000000/77462866)} , ignore_index=True)
                                          
final_tmb = sample_tmb_df.join(manifest.set_index("Kids_First_Biospecimen_ID"), on="Samplenames")[["Samplenames",  "TMB", "short_histology"]]
final_tmb.to_csv("outputs/test_tmbscores.txt", index=False)