# **Kidney-specific Proteomic and Transcriptomic DEG Analysis - Categorical Heatmap**

## All differentially expressed genes (DEGs) observed in the proteome and transcriptome kidney-specific datasets were scored using the following criteria:
### - Each DEG was assigned a score of 1 each time it was upregulated or -1 each time it was downregulated. The resulting sum of scores for each DEG was then calculated and multiplied by the number of times the DEG was observed in the kidney-specific proteome and transcriptome datasets. This resulted in a final score for each DEG that was used to rank the DEGs in the categorical heatmaps below.

### **Heatmaps**
### The first heatmap contains the highest ranked upregulated genes in the kidney-specific proteome and transcriptome datasets, with the additional datasets (PTM and epigenome and plasma/exosomes) plotted as well. All genes with a score of 9 or higher (not equal to 12) are shown.
### The second heatmap contains the highest ranked downregulated genes in the kidney-specific proteome and transcriptome datasets, with the additional datasets (PTM and epigenome and plasma/exosomes) plotted as well. All genes with an absolute value score of 9 or higher (not equal to 12) are shown.

In [None]:
## Load packages
import pandas as pd
import numpy as np
import palettable
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from matplotlib import rcParams
from comut import comut

In [None]:
# read in proteomic data
# IP = immpostvpre
# PP = postpostvpre
# pl = plasma
# ex = exosomes
df_BNL3 = pd.read_csv('BNL3_DEG_output_genome.csv')
df_I4_ex_IP = pd.read_csv('Inspiration4_exosomes_immpostVpre_DEG_output_genome.csv')
df_I4_pl_IP = pd.read_csv('Inspiration4_plasma_immpostVpre_DEG_output_genome.csv')
df_NSRL_F = pd.read_csv('NSRL22A_F_DEG_output_genome.csv')
df_NSRL_M = pd.read_csv('NSRL22A_M_DEG_output_genome.csv')
df_RR1 = pd.read_csv('RR1_DEG_output_genome.csv')
df_RR3 = pd.read_csv('RR3_DEG_output_genome.csv')
df_RR10 = pd.read_csv('RR10_DEG_output_genome.csv')
df_RR19 = pd.read_csv('RR19_DEG_output_genome.csv')

In [None]:
# read in transcriptomic data
df_RR1_R = pd.read_csv('RR1_RNA_DEG_output_genome.csv')
df_RR3_R = pd.read_csv('RR3_RNA_DEG_output_genome.csv')
df_RR7_C3_25 = pd.read_csv('RR7_RNA_C3HHeJ_25d_DEG_output_genome_02.csv')
df_RR7_C3_75 = pd.read_csv('RR7_RNA_C3HHeJ_75d_DEG_output_genome_02.csv')
df_RR7_C5_25 = pd.read_csv('RR7_RNA_C57BL6J_25d_DEG_output_genome_02.csv')
df_RR7_C5_75 = pd.read_csv('RR7_RNA_C57BL6J_75d_DEG_output_genome_02.csv')
df_RR10_R = pd.read_csv('RR10_RNA_DEG_output_genome.csv')
df_RR23 = pd.read_csv('RR23_RNA_DEG_output_genome_adjp.csv')
df_MHU1_MGGC = pd.read_csv('MHU1_RNA_MGvsGC_DEG_output_genome.csv')
df_MHU3 = pd.read_csv('MHU3_RNA_DEG_output_genome.csv')
df_JAXA_PF1 = pd.read_csv('JAXA_RNA_prevsflight1_DEG_output_genome.csv')
df_JAXA_PF4 = pd.read_csv('JAXA_RNA_prevsflight4_DEG_output_genome.csv')

In [None]:
# read in PTM data
df_RR10_PTM = pd.read_csv('RR10_Phos_DEG_output_genome.csv')

In [None]:
# read in epigenomic data
df_RR1_E = pd.read_csv('RR1_Epi_DEG_output_genome.csv')
df_RR3_E = pd.read_csv('RR3_Epi_DEG_output_genome.csv')

In [None]:
# make a list of the dataframes
df_list_de2 = [df_BNL3, df_I4_ex_IP, df_I4_pl_IP, df_NSRL_F, df_NSRL_M, df_RR1, df_RR3, df_RR10, 
               df_RR19, df_RR1_R, df_RR3_R, df_RR7_C3_25, df_RR7_C3_75, df_RR7_C5_25, df_RR7_C5_75, 
               df_RR10_R, df_RR23, df_MHU1_MGGC, df_MHU3, df_JAXA_PF1, df_JAXA_PF4, df_RR10_PTM, 
               df_RR1_E, df_RR3_E]

In [None]:
# for loop to add a column for the sample name
for df in df_list_de2:
    if df is df_BNL3:
        df['Sample'] = 'BNL3_Protein'
    elif df is df_I4_ex_IP:
        df['Sample'] = 'Inspiration4_exosomes_immpostVpre_Protein'
    elif df is df_I4_pl_IP:
        df['Sample'] = 'Inspiration4_plasma_immpostVpre_Protein'
    elif df is df_NSRL_F:
        df['Sample'] = 'NSRL22A_II_F_vs_I_F_Protein'
    elif df is df_NSRL_M:
        df['Sample'] = 'NSRL22A_II_M_vs_I_M_Protein'
    elif df is df_RR1:
        df['Sample'] = 'RR1_Protein'
    elif df is df_RR3:
        df['Sample'] = 'RR3_Protein'
    elif df is df_RR10:
        df['Sample'] = 'RR10_Protein'
    elif df is df_RR19:
        df['Sample'] = 'RR19_Protein'
    elif df is df_RR1_R:
        df['Sample'] = 'RR1_RNA'
    elif df is df_RR3_R:
        df['Sample'] = 'RR3_RNA'
    elif df is df_RR7_C3_25:
        df['Sample'] = 'RR7_C3HHeJ_GCvsSF_25d_RNA'
    elif df is df_RR7_C3_75:
        df['Sample'] = 'RR7_C3HHeJ_GCvsSF_75d_RNA'
    elif df is df_RR7_C5_25:
        df['Sample'] = 'RR7_C57BL6J_GCvsSF_25d_RNA'
    elif df is df_RR7_C5_75:
        df['Sample'] = 'RR7_C57BL6J_GCvsSF_75d_RNA'
    elif df is df_RR10_R:
        df['Sample'] = 'RR10_RNA'
    elif df is df_RR23:
        df['Sample'] = 'RR23_RNA'
    elif df is df_MHU1_MGGC:
        df['Sample'] = 'MHU1_MGvGC_RNA'
    elif df is df_MHU3:
        df['Sample'] = 'MHU3_RNA'
    elif df is df_JAXA_PF1:
        df['Sample'] = 'JAXA_preVflight1_RNA'
    elif df is df_JAXA_PF4:
        df['Sample'] = 'JAXA_preVflight4_RNA'
    elif df is df_RR10_PTM:
        df['Sample'] = 'RR10_Phos'
    elif df is df_RR1_E:
        df['Sample'] = 'RR1_Epi'
    elif df is df_RR3_E:
        df['Sample'] = 'RR3_Epi'

In [None]:
#multiply the logFC values by -1 for the MHU1_MGvsGC_RNA dataset
df_MHU1_MGGC['logFC'] = df_MHU1_MGGC['logFC'] * -1

In [None]:
# combine all dataframes into one
df_all = pd.concat(df_list_de2)

In [None]:
#make all gene names uppercase
df_all['Gene'] = df_all['Gene'].str.upper()

In [None]:
#read in metadata
df_meta = pd.read_csv('sample_metadata.csv')

In [None]:
#make a dictionary of the sample names and tissue types
sample_dict = dict(zip(df_meta['Sample'], df_meta['Tissue']))

In [None]:
sample_dict_02 = dict(zip(df_meta['Sample'], df_meta['Omic']))

In [None]:
#add a column for the tissue type
df_all['Tissue'] = df_all['Sample'].map(sample_dict)

In [None]:
#add a column for the omic type
df_all['Omic'] = df_all['Sample'].map(sample_dict_02)

In [None]:
#reassign the dataframe variable
df_deg = df_all

In [None]:
#keep only the proteome and transcriptome data
df_deg = df_deg[df_deg['Omic'] != 'PTM (Phospho)']
df_deg = df_deg[df_deg['Omic'] != 'Epigenome']

In [None]:
#keep only the kidney data
df_kid = df_deg[df_deg['Tissue'] == 'Kidney']

In [None]:
#subset the dataframe
df_kid_gene = df_kid[['Gene', 'logFC']]

In [None]:
#sort by gene name
df_kid_gene = df_kid_gene.sort_values(by=['Gene'])

In [None]:
#count the number of times each gene appears
df_kid_gene['count'] = df_kid_gene.groupby('Gene')['Gene'].transform('count')

In [None]:
#sort by count
df_kid_gene = df_kid_gene.sort_values(by=['count'], ascending=False)

In [None]:
#determine the direction of the logFC
df_kid_gene.loc[df_kid_gene['logFC'] > 0, 'Direction'] = 'Up'
df_kid_gene.loc[df_kid_gene['logFC'] < 0, 'Direction'] = 'Down'

In [None]:
#make a new column that counts the number of times a gene is up
df_kid_gene['Up'] = df_kid_gene['Direction'].str.count('Up')

In [None]:
#make a new column that counts the number of times a gene is down
df_kid_gene['Down'] = df_kid_gene['Direction'].str.count('Down')

#multiply this number by -1
df_kid_gene['Down'] = df_kid_gene['Down'] * -1

In [None]:
#group by gene and sum the Up and Down columns
df_kid_gene['Score'] = df_kid_gene['Up'] + df_kid_gene['Down']

In [None]:
#for each gene sum the score column
df_kid_gene['Score_sum'] = df_kid_gene.groupby('Gene')['Score'].transform('sum')

In [None]:
#calculate the product score
df_kid_gene['Product_score'] = df_kid_gene['count'] * df_kid_gene['Score_sum']

In [None]:
#sort by product score for upregulated genes
df_kid_gene_up = df_kid_gene.sort_values(by=['Product_score'], ascending=False)

In [None]:
#sort by product score for downregulated genes
df_kid_gene_down = df_kid_gene.sort_values(by=['Product_score'], ascending=True)

In [None]:
#remove duplicate gene names
df_kid_gene_up = df_kid_gene_up.drop_duplicates(subset=['Gene'], keep='first')
df_kid_gene_down = df_kid_gene_down.drop_duplicates(subset=['Gene'], keep='first')

In [None]:
#keep only genes with a product score equal to 9 or greater
df_kid_gene_up = df_kid_gene_up[df_kid_gene_up['Product_score'] >= 9]
df_kid_gene_down = df_kid_gene_down[df_kid_gene_down['Product_score'] <= -9]

In [None]:
#remove genes with a product score equal to 12
df_kid_gene_up = df_kid_gene_up[df_kid_gene_up['Product_score'] != 12]
df_kid_gene_down = df_kid_gene_down[df_kid_gene_down['Product_score'] != -12]

In [None]:
#store as a list
kid_gene_up_list = df_kid_gene_up['Gene'].tolist()
kid_gene_down_list = df_kid_gene_down['Gene'].tolist()

In [None]:
#store the unique samples as a list
sample_list = df_all['Sample'].unique().tolist()

In [None]:
#store the unique sample names in a list
sample_order = ['RR10_Phos', 'BNL3_Protein', 'NSRL22A_II_F_vs_I_F_Protein', 'NSRL22A_II_M_vs_I_M_Protein', 'RR10_Protein', 
                'RR1_Protein', 'RR3_Protein', 'RR19_Protein', 'Inspiration4_plasma_immpostVpre_Protein', 
                'Inspiration4_exosomes_immpostVpre_Protein', 'RR7_C3HHeJ_GCvsSF_25d_RNA', 'RR7_C57BL6J_GCvsSF_25d_RNA', 
                'RR10_RNA', 'MHU3_RNA', 'RR23_RNA', 'RR1_RNA', 'RR3_RNA', 'RR7_C3HHeJ_GCvsSF_75d_RNA', 
                'RR7_C57BL6J_GCvsSF_75d_RNA', 'MHU1_MGvGC_RNA', 'JAXA_preVflight1_RNA', 'JAXA_preVflight4_RNA', 'RR1_Epi', 
                'RR3_Epi']

In [None]:
# remove the samples that are not in the sample list
sample_order = [x for x in sample_order if x in sample_list]

In [None]:
# keep the top 200 genes
kid_gene_up_list_sub = kid_gene_up_list[:200]

In [None]:
# edit the dataframe to only include the top 50 genes
df_all_sub = df_all[df_all['Gene'].isin(kid_gene_up_list_sub)]

In [None]:
#store the unique samples as a list
sample_list = df_all_sub['Sample'].unique().tolist()

In [None]:
#store the unique sample names in a list
sample_order = ['RR10_Phos', 'BNL3_Protein', 'NSRL22A_II_F_vs_I_F_Protein', 'NSRL22A_II_M_vs_I_M_Protein', 'RR10_Protein', 
                'RR1_Protein', 'RR3_Protein', 'RR19_Protein', 'Inspiration4_plasma_immpostVpre_Protein', 
                'Inspiration4_exosomes_immpostVpre_Protein', 'RR7_C3HHeJ_GCvsSF_25d_RNA', 'RR7_C57BL6J_GCvsSF_25d_RNA', 
                'RR10_RNA', 'MHU3_RNA', 'RR23_RNA', 'RR1_RNA', 'RR3_RNA', 'RR7_C3HHeJ_GCvsSF_75d_RNA', 
                'RR7_C57BL6J_GCvsSF_75d_RNA', 'MHU1_MGvGC_RNA', 'JAXA_preVflight1_RNA', 'JAXA_preVflight4_RNA', 'RR1_Epi', 
                'RR3_Epi']

In [None]:
# remove the samples that are not in the sample list
sample_order = [x for x in sample_order if x in sample_list]

In [None]:
#reverse the order of the gene_order_sub list
kid_gene_up_list_sub.reverse()

In [None]:
#read in relabeled metadata file
df_label = pd.read_csv('sample_label_mapping.csv')

In [None]:
#remove the samples that are not in the sample list
df_label = df_label[df_label['Sample'].isin(sample_order)]

In [None]:
#turn the label column into a list
label_list = df_label['Label'].tolist()

In [None]:
#turn the df into a dictionary
label_dict = df_label.set_index('Sample').T.to_dict('list')

In [None]:
#reassign the dataframe variable
df_deg_sub = df_all_sub

In [None]:
#map the kidney up genes to the df_deg_sub
df_deg_kid_up = df_deg_sub[df_deg_sub['Gene'].isin(kid_gene_up_list_sub)]

In [None]:
# let's reorganize the data
df_fc = df_deg_sub[['Sample', 'Gene', 'logFC']]

In [None]:
# set the direction based on the logFC
df_fc['Direction'] = np.where(df_deg_sub['logFC'] > 0, 'Up', np.where(df_deg_sub['logFC'] < 0, 'Down', 'No Change'))

In [None]:
#subset the df
df_fc = df_fc[['Sample', 'Gene', 'Direction']]

In [None]:
#rename the columns to match the format of the comut package
df_fc.columns = ['sample', 'category', 'value']

In [None]:
#count the number of times the gene is detected across all samples
df_deg_sub['Count'] = df_deg_sub.groupby('Gene')['Gene'].transform('count')

In [None]:
#read in metadata file
df_meta = pd.read_csv('sample_metadata.csv')

In [None]:
#keep only the samples that are in the sample list
df_meta = df_meta[df_meta['Sample'].isin(sample_order)]

In [None]:
#order the samples in the metadata file
df_meta['Sample'] = pd.Categorical(df_meta['Sample'], sample_order)

In [None]:
#make a new dataframe with the sample names and tissue types
df_tis = df_meta[['Sample', 'Tissue']]
df_tis = df_tis.melt(id_vars=['Sample'], value_vars=['Tissue'])
df_tis.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and radiation type
df_rad = df_meta[['Sample', 'Exposure']]
df_rad = df_rad.melt(id_vars=['Sample'], value_vars=['Exposure'])
df_rad.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and species
df_spe = df_meta[['Sample', 'Species']]
df_spe = df_spe.melt(id_vars=['Sample'], value_vars=['Species'])
df_spe.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and Omics
df_dat = df_meta[['Sample', 'Omic']]
df_dat = df_dat.melt(id_vars=['Sample'], value_vars=['Omic'])
df_dat.columns = ['sample', 'category', 'value']

In [None]:
#set color palettes
vivid_10 = palettable.cartocolors.qualitative.Vivid_10.mpl_colors
balance_6 = palettable.cmocean.diverging.Balance_6.mpl_colors
safe_10 = palettable.cartocolors.qualitative.Safe_10.mpl_colors
vivid_10 = palettable.cartocolors.qualitative.Vivid_10.mpl_colors
purp_7 = palettable.cartocolors.sequential.Purp_7.mpl_colormap
tab20 = palettable.tableau.Tableau_20.mpl_colors
color_list = palettable.colorbrewer.qualitative.Set1_9.mpl_colors

In [None]:
#set plotting parameters
custom_rcParams = {
    'font.family': 'Arial',
    'font.size': 12,
    'axes.labelsize': 18,
    'legend.fontsize': 18,
    'ytick.labelsize': 18,
    'xtick.labelsize': 18,  
}

rcParams.update(custom_rcParams)

In [None]:
#assign the mappings

fc_mapping = {'Up': 'Green', 'Unknown': 'lightgrey', 'Down': 'Red', 'No Change': 'Black'}

tis_mapping = {'Kidney': tab20[0], 'Plasma': tab20[2], 'Exosome':tab20[5]}

rad_mapping = {'simGCRsim': tab20[6] , 'GCRsim': tab20[11] , 'Spaceflight': tab20[19]}

spe_mapping = {'Mouse': tab20[8], 'Human':tab20[9]}

dat_mapping = {'Proteome': safe_10[1], 'Transcriptome':safe_10[2], 'PTM (Phospho)': safe_10[6], 'Epigenome': safe_10[4]}

side_mapping = {'frequency': 'lightgrey'}
side_kwargs = {'height': 0.8}

category_order = kid_gene_up_list_sub

## **HIGHEST RANKED UPREGULATED GENES FROM KIDNEY TRANSCRIPTOMIC AND PROTEOMIC DATASETS**

## Categorical heatmap

In [None]:
#produce the comut plot
deg_comut = comut.CoMut()

deg_comut.samples = sample_order

deg_comut.add_categorical_data(df_fc, mapping=fc_mapping, category_order=category_order, name = 'Log2 Fold Change Direction')

deg_comut.add_categorical_data(df_tis, mapping=tis_mapping, name = 'Tissue')

deg_comut.add_categorical_data(df_rad, mapping=rad_mapping, name = 'Exposure')

deg_comut.add_categorical_data(df_spe, mapping=spe_mapping, name = 'Species')

deg_comut.add_categorical_data(df_dat, mapping=dat_mapping, name = 'Omic')

deg_comut.plot_comut(figsize = (17, 40), x_padding = 0.02, y_padding = 0.02, hspace = 0.01,
                    wspace = 0.01, widths = (5, 0.5))

deg_comut.axes['Omic'].set_xticklabels(label_list)

deg_comut.add_unified_legend(bbox_to_anchor = (1.1,1.0), frameon = True)

----------------

### Now we will repeat this for the downregulated genes.

In [None]:
#store the unique samples as a list
sample_list = df_all['Sample'].unique().tolist()

In [None]:
#store the unique sample names in a list
sample_order = ['RR10_Phos', 'BNL3_Protein', 'NSRL22A_II_F_vs_I_F_Protein', 'NSRL22A_II_M_vs_I_M_Protein', 'RR10_Protein', 
                'RR1_Protein', 'RR3_Protein', 'RR19_Protein', 'Inspiration4_plasma_immpostVpre_Protein', 
                'Inspiration4_exosomes_immpostVpre_Protein', 'RR7_C3HHeJ_GCvsSF_25d_RNA', 'RR7_C57BL6J_GCvsSF_25d_RNA', 
                'RR10_RNA', 'MHU3_RNA', 'RR23_RNA', 'RR1_RNA', 'RR3_RNA', 'RR7_C3HHeJ_GCvsSF_75d_RNA', 
                'RR7_C57BL6J_GCvsSF_75d_RNA', 'MHU1_MGvGC_RNA', 'JAXA_preVflight1_RNA', 'JAXA_preVflight4_RNA', 'RR1_Epi', 
                'RR3_Epi']

In [None]:
# remove the samples that are not in the sample list
sample_order = [x for x in sample_order if x in sample_list]

In [None]:
# keep the top 200 genes
kid_gene_down_list_sub = kid_gene_down_list[:200]

In [None]:
# edit the dataframe to only include the top 50 genes
df_all_sub = df_all[df_all['Gene'].isin(kid_gene_down_list_sub)]

In [None]:
#what are the unique sample names
sample_list = df_all_sub['Sample'].unique().tolist()

In [None]:
#store the unique sample names in a list
sample_order = ['RR10_Phos', 'BNL3_Protein', 'NSRL22A_II_F_vs_I_F_Protein', 'NSRL22A_II_M_vs_I_M_Protein', 'RR10_Protein', 
                'RR1_Protein', 'RR3_Protein', 'RR19_Protein', 'Inspiration4_plasma_immpostVpre_Protein', 
                'Inspiration4_exosomes_immpostVpre_Protein', 'RR7_C3HHeJ_GCvsSF_25d_RNA', 'RR7_C57BL6J_GCvsSF_25d_RNA', 
                'RR10_RNA', 'MHU3_RNA', 'RR23_RNA', 'RR1_RNA', 'RR3_RNA', 'RR7_C3HHeJ_GCvsSF_75d_RNA', 
                'RR7_C57BL6J_GCvsSF_75d_RNA', 'MHU1_MGvGC_RNA', 'JAXA_preVflight1_RNA', 'JAXA_preVflight4_RNA', 'RR1_Epi', 
                'RR3_Epi']

In [None]:
# remove the samples that are not in the sample list
sample_order = [x for x in sample_order if x in sample_list]

In [None]:
#reverse the order of the gene_order_sub list
kid_gene_down_list_sub.reverse()

In [None]:
#read in relabeled metadata file
df_label = pd.read_csv('sample_label_mapping.csv')

In [None]:
#remove the samples that are not in the sample list
df_label = df_label[df_label['Sample'].isin(sample_order)]

In [None]:
#turn the label column into a list
label_list = df_label['Label'].tolist()

In [None]:
#turn the df into a dictionary
label_dict = df_label.set_index('Sample').T.to_dict('list')

In [None]:
# reassign the dataframe variable
df_deg_sub = df_all_sub

In [None]:
# let's reorganize the data
df_fc = df_deg_sub[['Sample', 'Gene', 'logFC']]

In [None]:
# assign the directions based on the logFC
df_fc['Direction'] = np.where(df_deg_sub['logFC'] > 0, 'Up', np.where(df_deg_sub['logFC'] < 0, 'Down', 'No Change'))

In [None]:
#subset the dataframe
df_fc = df_fc[['Sample', 'Gene', 'Direction']]

In [None]:
#rename the columns to match the format of the comut package
df_fc.columns = ['sample', 'category', 'value']

In [None]:
#count the number of times the gene is detected across all samples
df_deg_sub['Count'] = df_deg_sub.groupby('Gene')['Gene'].transform('count')

In [None]:
#read in metadata file
df_meta = pd.read_csv('sample_metadata.csv')

In [None]:
#keep only the samples that are in the sample list
df_meta = df_meta[df_meta['Sample'].isin(sample_order)]

In [None]:
#order the samples in the metadata file
df_meta['Sample'] = pd.Categorical(df_meta['Sample'], sample_order)

In [None]:
#make a new dataframe with the sample names and tissue types
df_tis = df_meta[['Sample', 'Tissue']]
df_tis = df_tis.melt(id_vars=['Sample'], value_vars=['Tissue'])
df_tis.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and Exposure
df_rad = df_meta[['Sample', 'Exposure']]
df_rad = df_rad.melt(id_vars=['Sample'], value_vars=['Exposure'])
df_rad.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and species
df_spe = df_meta[['Sample', 'Species']]
df_spe = df_spe.melt(id_vars=['Sample'], value_vars=['Species'])
df_spe.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and Omics
df_dat = df_meta[['Sample', 'Omic']]
df_dat = df_dat.melt(id_vars=['Sample'], value_vars=['Omic'])
df_dat.columns = ['sample', 'category', 'value']

In [None]:
#assign the mappings

fc_mapping = {'Up': 'Green', 'Unknown': 'lightgrey', 'Down': 'Red', 'No Change': 'Black'}

tis_mapping = {'Kidney': tab20[0], 'Plasma': tab20[2], 'Exosome':tab20[5]}

rad_mapping = {'simGCRsim': tab20[6] , 'GCRsim': tab20[11] , 'Spaceflight': tab20[19]}

spe_mapping = {'Mouse': tab20[8], 'Human':tab20[9]}

dat_mapping = {'Proteome': safe_10[1], 'Transcriptome':safe_10[2], 'PTM (Phospho)': safe_10[6], 'Epigenome': safe_10[4]}

side_mapping = {'frequency': 'lightgrey'}
side_kwargs = {'height': 0.8}

category_order = kid_gene_down_list_sub

## **HIGHEST RANKED DOWNREGULATED GENES FROM KIDNEY TRANSCRIPTOMIC AND PROTEOMIC DATASETS**

## Categorical heatmap

In [None]:
deg_comut = comut.CoMut()

deg_comut.samples = sample_order

deg_comut.add_categorical_data(df_fc, mapping=fc_mapping, category_order=category_order, name = 'Log2 Fold Change Direction')

deg_comut.add_categorical_data(df_tis, mapping=tis_mapping, name = 'Tissue')

deg_comut.add_categorical_data(df_rad, mapping=rad_mapping, name = 'Exposure')

deg_comut.add_categorical_data(df_spe, mapping=spe_mapping, name = 'Species')

deg_comut.add_categorical_data(df_dat, mapping=dat_mapping, name = 'Omic')

deg_comut.plot_comut(figsize = (17, 60), x_padding = 0.02, y_padding = 0.02, hspace = 0.01,
                    wspace = 0.01, widths = (5, 0.5))

deg_comut.axes['Omic'].set_xticklabels(label_list)

deg_comut.add_unified_legend(bbox_to_anchor = (1.1,1.0), frameon = True)