In [None]:
## Load packages
import pandas as pd
import numpy as np
import palettable
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from matplotlib import rcParams
from comut import comut

In [None]:
#read in data
diff_df = pd.read_csv("difference_input.csv")
stat_df = pd.read_csv("stat_input.csv")

In [None]:
#melt the data
diff_df_melt = pd.melt(diff_df, id_vars=['feature'], var_name='Label', value_name='Difference')
stat_df_melt = pd.melt(stat_df, id_vars=['feature'], var_name='Label', value_name='P_Value')

In [None]:
#concatenate the dataframes
df = pd.concat([diff_df_melt, stat_df_melt['P_Value']], axis=1)

In [None]:
#fill the empty cells with NaN
df = df.replace('-', np.nan)

In [None]:
#convert the P_Value column to numeric values
df['P_Value'] = pd.to_numeric(df['P_Value'])

In [None]:
#convert the Difference column to numeric values
df['Difference'] = pd.to_numeric(df['Difference'])

In [None]:
#keep only the rows with a p value < 0.05
df = df[df['P_Value'] < 0.05]

In [None]:
#multiply the difference by -1
df['Difference'] = df['Difference'] * -1

In [None]:
#if the difference is positive, label it up
df.loc[df['Difference'] > 0, 'Direction'] = 'Up'

In [None]:
#if the difference is negative, label it down
df.loc[df['Difference'] < 0, 'Direction'] = 'Down'

In [None]:
#count the number of times a feature is observed across the samples
df['Count'] = df.groupby('feature')['feature'].transform('count')

In [None]:
#sort by the count column
df = df.sort_values(by=['Count'], ascending=False)

In [None]:
#define the sample order
sample_order = ['STS-135', 'RR6[R+4]', 'RR9', 'RR23_Met', 'RR1_Met', 'RR6[L+56]']

In [None]:
#read in relabeled metadata file
df_label = pd.read_csv('sample_label_mapping.csv')

In [None]:
#remove the samples that are not in the sample list
df_label = df_label[df_label['Sample'].isin(sample_order)]

In [None]:
#turn the label column into a list
label_list = df_label['Label'].tolist()

In [None]:
#assign the dataframe to a new variable
sig_df = df

In [None]:
#store the order of the features into a list
feature_order = sig_df['feature'].tolist()

In [None]:
#remove duplicate category names
feature_order = list(dict.fromkeys(feature_order))

In [None]:
#reverse the order of the category names
feature_order.reverse()

In [None]:
#subset the dataframe into a new dataframe
df_fc = sig_df[['Label', 'feature', 'Direction']]

In [None]:
#rename the columns to match the format of the comut package
df_fc.columns = ['sample', 'category', 'value']

In [None]:
#read in metadata file
df_meta = pd.read_csv('sample_metadata.csv')

In [None]:
#keep only the samples that are in the sample list
df_meta = df_meta[df_meta['Label'].isin(sample_order)]

In [None]:
#order the Labels in the metadata file
df_meta['Label'] = pd.Categorical(df_meta['Label'], sample_order)

In [None]:
#make a new dataframe with the sample names and tissue types
df_tis = df_meta[['Label', 'Sample']]
df_tis = df_tis.melt(id_vars=['Label'], value_vars=['Sample'])
df_tis.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and radiation type
df_rad = df_meta[['Label', 'Exposure']]
df_rad = df_rad.melt(id_vars=['Label'], value_vars=['Exposure'])
df_rad.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and species
df_spe = df_meta[['Label', 'Species']]
df_spe = df_spe.melt(id_vars=['Label'], value_vars=['Species'])
df_spe.columns = ['sample', 'category', 'value']

In [None]:
#make a new dataframe with the sample names and Omics
df_dat = df_meta[['Label', 'Omic']]
df_dat = df_dat.melt(id_vars=['Label'], value_vars=['Omic'])
df_dat.columns = ['sample', 'category', 'value']

In [None]:
#loading in color palettes
vivid_10 = palettable.cartocolors.qualitative.Vivid_10.mpl_colors
balance_6 = palettable.cmocean.diverging.Balance_6.mpl_colors
safe_10 = palettable.cartocolors.qualitative.Safe_10.mpl_colors
vivid_10 = palettable.cartocolors.qualitative.Vivid_10.mpl_colors
purp_7 = palettable.cartocolors.sequential.Purp_7.mpl_colormap
tab20 = palettable.tableau.Tableau_20.mpl_colors
color_list = palettable.colorbrewer.qualitative.Set1_9.mpl_colors

#setting aesthetic parameters
custom_rcParams = {
    'font.family': 'Arial',
    'font.size': 12,
    'axes.labelsize': 18,
    'legend.fontsize': 18,
    'ytick.labelsize': 18,
    'xtick.labelsize': 18,  
}

rcParams.update(custom_rcParams)

In [None]:
#assign the color mappings
fc_mapping = {'Up': 'Green', 'Unknown': 'lightgrey', 'Down': 'Red', 'No Change': 'Black'}

tis_mapping = {'Faeces': tab20[17]}

rad_mapping = {'Spaceflight': tab20[19]}

spe_mapping = {'Mouse': tab20[8]}

dat_mapping = {'Metagenome': safe_10[9]}

side_mapping = {'frequency': 'lightgrey'}
side_kwargs = {'height': 0.8}

#set the category order
category_order = feature_order

## **All significant features shown**

## Categorical heatmap

In [None]:
#produce the comut plot
deg_comut = comut.CoMut()

deg_comut.samples = sample_order

deg_comut.add_categorical_data(df_fc, mapping=fc_mapping, category_order=category_order, name = 'Direction')

deg_comut.add_categorical_data(df_tis, mapping=tis_mapping, name = 'Sample')

deg_comut.add_categorical_data(df_rad, mapping=rad_mapping, name = 'Exposure')

deg_comut.add_categorical_data(df_spe, mapping=spe_mapping, name = 'Species')

deg_comut.add_categorical_data(df_dat, mapping=dat_mapping, name = 'Omic')

deg_comut.plot_comut(figsize = (6, 10), x_padding = 0.02, y_padding = 0.02, hspace = 0.01,
                    wspace = 0.01, widths = (5, 0.5))

deg_comut.axes['Omic'].set_xticklabels(label_list)

deg_comut.add_unified_legend(bbox_to_anchor = (1.2,1.0), frameon = True)