In [1]:
import pandas as pd
import numpy as np
from matplotlib_venn import venn2
from matplotlib import pyplot as plt
import seaborn as sns
import scipy.stats as stats
import os
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import seaborn as sns; sns.set()


%matplotlib inline

# Goal: http://www.nature.com/ng/journal/v48/n5/fig_tab/ng.3547_F1.html
#http://seaborn.pydata.org/generated/seaborn.clustermap.html

### Mutation definitions

In [2]:
'''
create a file that contains all the variant effect definitions 
'''
#! cp /cellar/users/mdow/Projects/HCC/DNA_analysis/Mouse_DNA/var_def.py .
import var_def

syn = var_def.syn
missense = var_def.missense
splice = var_def.splice
nonsense = var_def.nonsense
frameshift = var_def.frameshift
inframeIndel = var_def.inframeIndel
silentRegions = var_def.silentRegions
others = var_def.others

### Gene definitions

In [10]:
'''
Take out non-recurrent synonymous genes
'''
tcga_genes = ['TP53','CTNNB1','ALB','AXIN1','BAP1','KEAP1','NFE2L2','LZTR1','RB1','PIK3CA',
              'RPS6KA3','AZIN1','KRAS','IL6ST','RP1L1','CDKN2A','EEF1A1','ARID2','ARID1A','GPATCH4',
             'ACVR2A','APOB','CREB3L3','NRAS','AHCTF1','HIST1H1C','SF3B1','SMARCA4']

icgc_genes = ['TP53','LRP1B','KMT2C','ARID1A','AXIN1','ARID2','TSC2','KMT2A','KMT2D','SETD2',
              'CREBBP','RB1','HNF1A','APC','ATRX','EP300','TSC1','NOTCH2','NSD1','PTCH1']

hcc_genes = tcga_genes + icgc_genes

In [14]:
import pathway_genes

pGenes = pathway_genes.pGenes
pGenes_more= pathway_genes.pGenes_more

tcga_genes = ['TP53','CTNNB1','ALB','AXIN1','BAP1','KEAP1','NFE2L2','LZTR1','RB1','PIK3CA',
              'RPS6KA3','AZIN1','KRAS','IL6ST','RP1L1','CDKN2A','EEF1A1','ARID2','ARID1A','GPATCH4',
             'ACVR2A','APOB','CREB3L3','NRAS','AHCTF1','HIST1H1C','SF3B1','SMARCA4',
             'TERT','BRAF']

inDir='/cellar/users/mdow/Projects/HCC/DNA_analysis/Mouse_DNA/'
fname='annotated_variants_ortGenes_humanGNames.csv'

In [20]:
'''
Cosmic cancer genes
'''
# Check cosmic genes 

cosmicDir = '/cellar/users/mdow/Data/COSMIC/grch37/'
'''
Table of cancer genes information (HGNC)
'''
cosmicHGNC = pd.read_csv(cosmicDir+'CosmicHGNC.tsv',sep='\t')

print cosmicHGNC.shape

cosmicHGNC.columns = [u'COSMIC_ID', u'COSMIC_GENE_NAME', u'Entrez_id', u'HGNC_ID',
       u'mutated', u'Cancer_census', u'Expert_Curated']

print cosmicHGNC.Cancer_census.value_counts()


(561, 7)
y    559
n      2
Name: Cancer_census, dtype: int64


In [21]:
new_consensus = pd.read_csv('/cellar/users/mdow/Data/COSMIC/Census_all_01172018.csv')

## Set-up plotting variables

In [9]:
'''
Mouse model order
'''
model_order = ['TAK1','MUP','STAM','DEN']

'''
Model colours
'''
colors = sns.color_palette("Set1", 4)
modelColors=((colors[0],) * 9 +
(colors[1],) * 9 + 
(colors[2],) * 19 +
(colors[3],) * 19)

'''
coMut plot colors
'''
brown=sns.color_palette("BrBG", 10)[0]
purple=sns.color_palette("PRGn", 8)[0]

colorLs=sns.color_palette("colorblind", 8)
#blue=sns.color_palette("deep", 10)[0]
blue=sns.color_palette("colorblind", 10)[0]
green = sns.color_palette("Paired", 10)[3]
orange=colorLs[2]
yellow=colorLs[4]

myCmap = mpl.colors.ListedColormap([brown,"#f2f2f2",yellow,orange,"#f2f2f2",green,purple,'r',blue])

## P-value

### Processing functions

In [6]:
'''
Calculate scores for the different mutation Consequence
Input: # Cols: 	MODEL	SAMPLE	SYMBOL	Consequence
Output:# Cols: MODEL	SAMPLE	SYMBOL	Consequence	Score
'''

def get_dfScore_new(thisDf):

    cq_lst = list(thisDf.Consequence)
    print thisDf.shape
    print len(cq_lst)
    # Assign scores for the different types of mutation
    score_list=list()
    for s in cq_lst:
        #syn, missense, splice, nonsense, frameshift, inframeIndel, silentRegions, others
        if s in missense:
            score_list.append(4)
        elif s in nonsense:
            score_list.append(3)
        elif s in splice:
            score_list.append(2)
        elif s in syn:
            #print s
            score_list.append(1)
        elif s in frameshift:
            score_list.append(-1)
        elif s in inframeIndel:
            score_list.append(-2)
        elif s in silentRegions:
            score_list.append(0)
        elif s in others:
            score_list.append(-4)
        else: 
            print s


    
    print len(score_list)
    
    thisDf['Score']=score_list
    
    return thisDf

### Human Plotting Functions

In [2]:
def subset_dfs_human(myGenes, geneCnt_human, geneCnt_mouse, human_mutScore):
    myGenes_upper =[g.upper() for g in myGenes]
    myGenes_lower = [g.lower() for g in myGenes]

    # Gene alteration sample count table
    
    #tmp_h = geneCnt_human.copy().set_index(['SYMBOL'])
    #tmp_m = geneCnt_mouse.copy().set_index(['SYMBOL'])
    #geneCnt_h_sub = tmp_h.loc[myGenes_upper].fillna(0)
    #geneCnt_m_sub = tmp_m.loc[myGenes_upper].fillna(0)
    geneCnt_h_sub = geneCnt_human[geneCnt_human.SYMBOL.isin(myGenes_lower+myGenes_upper)]
    geneCnt_m_sub = geneCnt_mouse[geneCnt_mouse.SYMBOL.isin(myGenes_lower+myGenes_upper)]
    print 'Sub count tables in human and mouse:', geneCnt_h_sub.shape, geneCnt_m_sub.shape

    # Main score table
    df_sub = human_mutScore.iloc[:,human_mutScore.columns.isin(myGenes_lower+myGenes_upper)]
    print 'Sub main table:',df_sub.shape
    
    return geneCnt_h_sub, geneCnt_m_sub, df_sub

def sort_dfs_human(geneCnt_human, geneCnt_mouse,df_sub,species):
    
    if species == 'human':
        geneOrder = get_geneOrder(geneCnt_human)
    else:
        geneOrder = get_geneOrder(geneCnt_mouse)
    print geneOrder
    
    # Sort human gene count
    geneCnt_human.index = geneCnt_human.SYMBOL
    geneCnt_h_srt = geneCnt_human.loc[geneOrder,:]
    geneCnt_h_srt['Count_r'] =geneCnt_h_srt.Count * -1
    
    # Sort mouse gene count
    geneCnt_mouse.index = geneCnt_mouse.SYMBOL
    geneCnt_m_srt = geneCnt_mouse.loc[geneOrder,:]
    geneCnt_m_srt['Count_r'] =geneCnt_m_srt.Count * -1

    # Sort the heatmap by this gene list 
    human_sub_srt = df_sub.loc[:,geneOrder]

    return geneCnt_h_srt, geneCnt_m_srt, human_sub_srt

In [39]:
'''
sort samples and mutations like coMut waterfall
'''
def sort_comut_human(human_sub_srt,geneCnt_h_srt,myDict):
    print 'sort comut'
    #1. change to binary
    #2. sort by genes
    df_bi = human_sub_srt.copy()
    df_bi[df_bi > 1] = 1

    models = pd.Series(human_sub_srt.index).map(myDict)

    myModels = models.unique()
    df_bi['models'] = models.tolist()
    
    df_bi = df_bi.set_index([df_bi.models,df_bi.index])

    myGenes = geneCnt_m_srt.index.tolist()
    #print myGenes
    df_srt = df_bi.sort_values(myGenes,ascending=False).T
    sample_sorted = df_srt.columns.tolist()

    # Get new order sorted by models
    sample_srtDf = pd.DataFrame(sample_sorted)
    sample_srtDf.columns = ['Model','Sample']
    #print sample_srtDf.head()
    tmp = sample_srtDf.groupby(['Model']).Sample.unique().reset_index()
    
    print tmp
    newOrder=list()
    for m in myModels:
        print m
        myList = tmp.Sample[tmp.Model == m].tolist()
        #print myList[0]
        
        for s in myList[0]:
            #print s
            newOrder.append(s)
        print len(newOrder)

    mySorted = human_sub_srt.reindex(newOrder)
    return mySorted

In [38]:
'''
sort samples and mutations like coMut waterfall
'''
def sort_comut_human_mixed(human_sub_srt,geneCnt_h_srt,myDict):
    print 'sort comut'
    #1. change to binary
    #2. sort by genes
    df_bi = human_sub_srt.copy()
    df_bi[df_bi > 1] = 1

    models = pd.Series(human_sub_srt.index).map(myDict)

    myModels = models.unique()
    df_bi['models'] = models.tolist()
    
    df_bi = df_bi.set_index([df_bi.models,df_bi.index])

    myGenes = geneCnt_m_srt.index.tolist()
    #print myGenes
    df_srt = df_bi.sort_values(myGenes,ascending=False).T
    sample_sorted = df_srt.columns.tolist()

    # Get new order sorted by models
    sample_srtDf = pd.DataFrame(sample_sorted)
    sample_srtDf.columns = ['Model','Sample']
    #print sample_srtDf.head()
    tmp = sample_srtDf.groupby(['Model']).Sample.unique().reset_index()
    
    print tmp
    newOrder=sample_srtDf.Sample.tolist()

    mySorted = human_sub_srt.reindex(newOrder)
    return mySorted

### Mouse Plotting Functions

In [1]:
def get_sigGenes(pValDf, pThres, topN):
    sigLs = list()
    for i in pValDf.columns[0:4]:
        print i 
        print (pValDf.loc[:,i] < pThres).value_counts()
        sigGenes = pValDf.loc[:,i].sort_values()[0:topN].index
        [sigLs.append(g) for g in sigGenes]
        
    return sigLs

def subset_dfs(myGenes, pValDf, geneCnt_human, geneCnt_mouse, mouse_mutScore):
    myGenes_upper =[g.upper() for g in myGenes]
    myGenes_lower = [g.lower() for g in myGenes]

    # pVal df 
    pValDf_sub = pValDf[pValDf.Genes.isin(myGenes_lower+myGenes_upper)]
    pValDf_sub.index = [g.upper() for g in pValDf_sub.Genes]

    print len(myGenes)
    print 'Sub pval table:', pValDf.shape, pValDf_sub.shape

    # Gene alteration sample count table
#    tmp_h = geneCnt_human.copy().set_index(['SYMBOL'])
#    tmp_m = geneCnt_mouse.copy().set_index(['SYMBOL'])    
#    geneCnt_h_sub = tmp_h.loc[myGenes_upper].fillna(0)
#    geneCnt_m_sub = tmp_m.loc[myGenes_upper].fillna(0)
#    geneCnt_h_sub['SYMBOL'] = geneCnt_h_sub.index
#    geneCnt_m_sub['SYMBOL'] = geneCnt_m_sub.index
    geneCnt_h_sub = geneCnt_human[geneCnt_human.SYMBOL.isin(myGenes_lower+myGenes_upper)]
    geneCnt_m_sub = geneCnt_mouse[geneCnt_mouse.SYMBOL.isin(myGenes_lower+myGenes_upper)]
    print 'Sub count tables in human and mouse:', geneCnt_h_sub.shape, geneCnt_m_sub.shape

    # Main score table
    mouse_sub = mouse_mutScore.iloc[:,mouse_mutScore.columns.isin(myGenes_lower+myGenes_upper)]
    print 'Sub main table:',mouse_sub.shape
    
    return pValDf_sub, geneCnt_h_sub, geneCnt_m_sub, mouse_sub

'''
Determine the order of gene
Sorted by gene count, but the table can be either human or mouse
'''
def get_geneOrder(geneCnt):
    # Get gene order from gene counts 
    geneCnt_srt = geneCnt.sort_values('Count',ascending=False)
    geneCnt_srt['Count_r'] =geneCnt_srt.Count * -1
    geneOrder = geneCnt_srt.SYMBOL.tolist()
    
    return geneOrder

def sort_dfs(geneCnt_human, geneCnt_mouse,mouse_sub,pValDf_sub,species):
    
    if species == 'human':
        geneOrder = get_geneOrder(geneCnt_human)
    else:
        geneOrder = get_geneOrder(geneCnt_mouse)
    print geneOrder
    
    # Sort human gene count
    geneCnt_human.index = geneCnt_human.SYMBOL
    geneCnt_h_srt = geneCnt_human.loc[geneOrder,:]
    geneCnt_h_srt['Count_r'] =geneCnt_h_srt.Count * -1
    
    # Sort mouse gene count
    geneCnt_mouse.index = geneCnt_mouse.SYMBOL
    geneCnt_m_srt = geneCnt_mouse.loc[geneOrder,:]
    geneCnt_m_srt['Count_r'] =geneCnt_m_srt.Count * -1

    # Sort the heatmap by this gene list 
    mouse_sub_srt = mouse_sub.loc[:,geneOrder]

    '''
    Mouse model order
    '''
    model_order = ['TAK1','MUP','STAM','DEN']
    # Sort p-value table
    pValDf_sub_srt = pValDf_sub.loc[geneOrder,model_order].fillna(1)
    
    return geneCnt_h_srt, geneCnt_m_srt, mouse_sub_srt, pValDf_sub_srt

In [5]:
'''
sort samples and mutations like coMut waterfall
'''
def sort_comut(mouse_sub_srt, geneCnt_m_srt):
    #1. change to binary
    #2. sort by genes
    mouse_bi = mouse_sub_srt.copy()

    mouse_bi[mouse_bi > 1] = 1

    models = ['TAK1']*9 + ['MUP']*9 + ['STAM']*19 + ['DEN']*19 
    mouse_bi['models'] = models
    mouse_bi = mouse_bi.set_index([mouse_bi.models,mouse_bi.index])

    myGenes = geneCnt_m_srt.index.tolist()
    print myGenes
    mouse_sub_srtS = mouse_bi.sort_values(myGenes,ascending=False).T
    sample_sorted = mouse_sub_srtS.columns.tolist()

    # Get new order sorted by models
    sample_srtDf = pd.DataFrame(sample_sorted)
    sample_srtDf.columns = ['Model','Sample']
    tmp = sample_srtDf.groupby(['Model']).Sample.unique().reset_index()
    newOrder=list()
    for m in ['TAK1','MUP','STAM','DEN']:
        myList = tmp.Sample[tmp.Model == m].tolist()
        for s in myList[0]:
            #print s
            newOrder.append(s)

    mySorted = mouse_sub_srt.reindex(newOrder)
    return mySorted


In [40]:

'''
sort samples and mutations like coMut waterfall
'''
def sort_comut_human_mixed(human_sub_srt,geneCnt_h_srt,myDict):
    print 'sort comut'
    #1. change to binary
    #2. sort by genes
    df_bi = human_sub_srt.copy()
    df_bi[df_bi > 1] = 1

    models = pd.Series(human_sub_srt.index).map(myDict)

    myModels = models.unique()
    df_bi['models'] = models.tolist()
    
    df_bi = df_bi.set_index([df_bi.models,df_bi.index])

    myGenes = geneCnt_m_srt.index.tolist()
    #print myGenes
    df_srt = df_bi.sort_values(myGenes,ascending=False).T
    sample_sorted = df_srt.columns.tolist()

    # Get new order sorted by models
    sample_srtDf = pd.DataFrame(sample_sorted)
    sample_srtDf.columns = ['Model','Sample']
    #print sample_srtDf.head()
    tmp = sample_srtDf.groupby(['Model']).Sample.unique().reset_index()
    
    print tmp
    newOrder=sample_srtDf.Sample.tolist()

    mySorted = human_sub_srt.reindex(newOrder)
    return mySorted

### Plot functions

In [2]:
def plot_final(heatmap_df, pval_df, top_count, left_count_human, left_count_mouse,figname):
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    numGenes = mySorted.columns.nunique()

    sns.set(font_scale=0.8)
    plt.figure(figsize=(10,numGenes*0.18))
    ax = sns.heatmap(heatmap_df.T,cmap=myCmap,center=0,linewidths=0.1,cbar = 0)

    ax.set_xticklabels('')
    ax.set_ylabel('')

    # create new axes on the right and on the top of the current axes.
    divider = make_axes_locatable(ax)
    sns.set_style('whitegrid')
    axHistx = divider.append_axes("top", size=0.5, pad=0.1, sharex=ax) # Top 
    sns.set_style('white')
    axHisty = divider.append_axes("right", size=0.5, pad=0.1) #Right 
    sns.set_style('whitegrid')
    axHistz = divider.append_axes("bottom", size=0.1, pad=0.05, sharex=ax) # Bottom
    axHistk = divider.append_axes("left", size=0.7, pad=0.7, sharey=ax) # Left 1
    axHistk2 = divider.append_axes("left", size=0.7, pad=0.2, sharey=ax) # Left 2
    # the scatter plot:
    # histograms
    lim = 10
    binwidth=2
    bins = np.arange(-lim, lim + binwidth, binwidth)


    # Plot 1 - Right, total alterations -> P-value
    sns.set_style('white')
    myPvals_srt = pval_df.loc[:,model_order]
    axHisty.imshow(-np.log10(pval_df), cmap='OrRd', interpolation='nearest', aspect='auto')
    axHisty.yaxis.set_visible(False)
    axHisty.set_xticks([0,1,2,3])
    axHisty.set_xlabel("-log10(p-val)")
    axHisty.set_xticklabels(model_order,rotation=90)


    # Plot 2 - Top, Stack plot
    axHistx.bar(xrange(top_count.SAMPLE.nunique()),top_count.Total, color = "green")
    axHistx.bar(xrange(top_count.SAMPLE.nunique()),top_count.nonSyn_Count, color = "#0000A3")

    axHistx.xaxis.set_visible(False)
    axHistx.set_ylabel("# Mutations")

    # Plot 3 - plot on bottom, sample label
    axHistz.bar(xrange(top_count.SAMPLE.nunique()),[1]*top_count.SAMPLE.nunique(), color = modelColors,width=1)
    axHistz.xaxis.set_visible(False)
    axHistz.yaxis.set_visible(False)

    # Plot 4 - total alterations (Mouse)
    axHistk.barh(xrange(numGenes),left_count_mouse.Count_r)
    axHistk.yaxis.set_visible(False)
    #xCntTicks = [25, 20,15,10,5,0]
    xCntTicks = [-20,-15,-10,-5,-0]
    xCntTicksLabel = [0.4,0.3,0.2,0.1,0]
    #xTicks = [float("{0:.2f}".format(i/float(56))) for i in xCntTicks]
    axHistk.set_xticks(xCntTicks)
    axHistk.set_xticklabels(xCntTicksLabel)
    axHistk.set_xlabel("% Alteration \n (Mouse)")
    
    # Plot 5 - total alterations (Human)
    axHistk2.barh(xrange(numGenes),left_count_human.Count_r)
    axHistk2.yaxis.set_visible(False)
    x_pct = [-485,-364, -242, -121,0]
    axHistk2.set_xticks(x_pct)#[-300,-200,-100,0]
    axHistk2.set_xticklabels([0.4,0.3,0.2,0.1,0])
    axHistk2.set_xlabel("% Alteration \n (Human)")
    
    #ax.figure.savefig(figname)
    import matplotlib
    matplotlib.rcParams['pdf.fonttype'] = 42
    matplotlib.rcParams['ps.fonttype'] = 42
    
    ax.figure.savefig(figname, transparent=True)
    

In [41]:
def plot_final_small(heatmap_df, pval_df, top_count, left_count_human, left_count_mouse,figname):
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    numGenes = mySorted.columns.nunique()

    sns.set(font_scale=0.8)
    plt.figure(figsize=(10,numGenes*0.3))
    ax = sns.heatmap(heatmap_df.T,cmap=myCmap,center=0,linewidths=0.1,cbar = 0)

    ax.set_xticklabels('')
    ax.set_ylabel('')

    # create new axes on the right and on the top of the current axes.
    divider = make_axes_locatable(ax)
    sns.set_style('whitegrid')
    axHistx = divider.append_axes("top", size=0.3, pad=0.1, sharex=ax) # Top 
    sns.set_style('white')
    axHisty = divider.append_axes("right", size=0.5, pad=0.1) #Right 
    sns.set_style('whitegrid')
    axHistz = divider.append_axes("bottom", size=0.1, pad=0.05, sharex=ax) # Bottom
    axHistk = divider.append_axes("left", size=0.7, pad=0.7, sharey=ax) # Left 1
    axHistk2 = divider.append_axes("left", size=0.7, pad=0.2, sharey=ax) # Left 2
    # the scatter plot:
    # histograms
    lim = 10
    binwidth=2
    bins = np.arange(-lim, lim + binwidth, binwidth)


    # Plot 1 - Right, total alterations -> P-value
    sns.set_style('white')
    myPvals_srt = pval_df.loc[:,model_order]
    axHisty.imshow(-np.log10(pval_df), cmap='OrRd', interpolation='nearest', aspect='auto')
    axHisty.yaxis.set_visible(False)
    axHisty.set_xticks([0,1,2,3])
    axHisty.set_xlabel("-log10(p-val)")
    axHisty.set_xticklabels(model_order,rotation=90)


    # Plot 2 - Top, Stack plot
    axHistx.bar(xrange(top_count.SAMPLE.nunique()),top_count.Total, color = "green")
    axHistx.bar(xrange(top_count.SAMPLE.nunique()),top_count.nonSyn_Count, color = "#0000A3")

    axHistx.xaxis.set_visible(False)
    axHistx.set_ylabel("# Mutations")

    # Plot 3 - plot on bottom, sample label
    axHistz.bar(xrange(top_count.SAMPLE.nunique()),[1]*top_count.SAMPLE.nunique(), color = modelColors,width=1)
    axHistz.xaxis.set_visible(False)
    axHistz.yaxis.set_visible(False)

    # Plot 4 - total alterations (Mouse)
    axHistk.barh(xrange(numGenes),left_count_mouse.Count_r)
    axHistk.yaxis.set_visible(False)
    #xCntTicks = [25, 20,15,10,5,0]
    xCntTicks = [-30, -20,-10,-0]
    xCntTicksLabel = [0.6,0.4,0.2,0]
    #xTicks = [float("{0:.2f}".format(i/float(56))) for i in xCntTicks]
    axHistk.set_xticks(xCntTicks)
    axHistk.set_xticklabels(xCntTicksLabel)
    axHistk.set_xlabel("% Alteration \n (Mouse)")
    
    # Plot 5 - total alterations (Human)
    axHistk2.barh(xrange(numGenes),left_count_human.Count_r)
    axHistk2.yaxis.set_visible(False)
    axHistk2.set_xticks([-300,-200,-100,0])
    axHistk2.set_xticklabels([0.3,0.2,0.1,0])
    axHistk2.set_xlabel("% Alteration \n (Human)")
    
    import matplotlib
    matplotlib.rcParams['pdf.fonttype'] = 42
    matplotlib.rcParams['ps.fonttype'] = 42
    
    ax.figure.savefig(figname, transparent=True)

In [2]:
def plot_final_small_human(heatmap_df, top_count, left_count_human, left_count_mouse,humanToProj_dict,figname):
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    numGenes = mySorted.columns.nunique()

    sns.set(font_scale=0.8)
    plt.figure(figsize=(10,numGenes*0.23))
    ax = sns.heatmap(heatmap_df.T,cmap=myCmap,center=0,linewidths=0.1,cbar = 0)

    ax.set_xticklabels('')
    ax.set_ylabel('')

    # create new axes on the right and on the top of the current axes.
    divider = make_axes_locatable(ax)
    sns.set_style('whitegrid')
    axHistx = divider.append_axes("top", size=0.3, pad=0.1, sharex=ax) # Top 
    #sns.set_style('white')
    #axHisty = divider.append_axes("right", size=0.5, pad=0.1) #Right 
    sns.set_style('whitegrid')
    axHistz = divider.append_axes("bottom", size=0.1, pad=0.05, sharex=ax) # Bottom
    axHistk = divider.append_axes("left", size=0.7, pad=0.7, sharey=ax) # Left 1
    axHistk2 = divider.append_axes("left", size=0.7, pad=0.2, sharey=ax) # Left 2
    # the scatter plot:
    # histograms
    lim = 10
    binwidth=2
    bins = np.arange(-lim, lim + binwidth, binwidth)

    # Plot 2 - Top, Stack plot
    axHistx.bar(xrange(top_count.SAMPLE.nunique()),top_count.Total, color = "green")
    axHistx.bar(xrange(top_count.SAMPLE.nunique()),top_count.nonSyn_Count, color = "#0000A3")

    axHistx.xaxis.set_visible(False)
    axHistx.set_ylabel("# Mutations")


    # Plot 3 - plot on bottom, sample label

    # generate new cohort colors 
    colors = sns.color_palette("Set2", 4)
    proj_to_colors = {'LICA-CN':colors[0],'LICA-FR':colors[1],'LIHC-US':colors[2],'LINC-JP':colors[3]}
    top_count_srt = top_count.set_index(['SAMPLE'])
    top_count_srt = top_count_srt.loc[heatmap_df.index,:]
    cohortColors = pd.Series(top_count_srt.index).map(humanToProj_dict).map(proj_to_colors)
    
    axHistz.bar(xrange(top_count.SAMPLE.nunique()),[1]*top_count.SAMPLE.nunique(), color = cohortColors,width=1)
    axHistz.xaxis.set_visible(False)
    axHistz.yaxis.set_visible(False)

    # Plot 4 - total alterations (Mouse)
    axHistk.barh(xrange(numGenes),left_count_mouse.Count_r)
    axHistk.yaxis.set_visible(False)
    #xCntTicks = [25, 20,15,10,5,0]
    xCntTicks = [-40,-30, -20,-10,-0]
    xCntTicksLabel = [0.4,0.3,0.2,0.1,0]
    #xTicks = [float("{0:.2f}".format(i/float(56))) for i in xCntTicks]
    axHistk.set_xticks(xCntTicks)
    axHistk.set_xticklabels(xCntTicksLabel)
    axHistk.set_xlabel("% Alteration \n (Mouse)")
    
    # Plot 5 - total alterations (Human)
    axHistk2.barh(xrange(numGenes),left_count_human.Count_r)
    axHistk2.yaxis.set_visible(False)
    axHistk2.set_xticks([-480,-360,-240,-120,0])
    axHistk2.set_xticklabels([0.4,0.3,0.2,0.1,0])
    axHistk2.set_xlabel("% Alteration \n (Human)")
    
    import matplotlib
    matplotlib.rcParams['pdf.fonttype'] = 42
    matplotlib.rcParams['ps.fonttype'] = 42
    
    ax.figure.savefig(figname, transparent=True)