In [7]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.stats import spearmanr, pearsonr
import statsmodels.api



In [8]:

def correlations(df):
    corrs, pvalues = spearmanr(df.values)
    corrs_fea , pvalues_fea = pd.DataFrame(corrs[0,1:]).T , pd.DataFrame(pvalues[0,1:]).T
    corrs_fea.columns, corrs_fea.index = df_merged.columns[1:] , [df2.columns[0]]
    pvalues_fea.columns, pvalues_fea.index = df_merged.columns[1:] , [df2.columns[0]]

    return corrs_fea,pvalues_fea

def correlations_multifactors(df1,df2,df_merged):
    corrs, pvalues = spearmanr(df_merged.values)
    corrs_fea , pvalues_fea = pd.DataFrame(corrs[:len(df2.columns),len(df2.columns):]) , pd.DataFrame(pvalues[:len(df2.columns),len(df2.columns):])
    corrs_fea.columns, corrs_fea.index = df_merged.columns[len(df2.columns):] , df_merged.columns[:len(df2.columns)]
    pvalues_fea.columns, pvalues_fea.index = df_merged.columns[len(df2.columns):] , df_merged.columns[:len(df2.columns)]

    return corrs_fea,pvalues_fea

def heatmap(df,pvals,conf,figtitle=f"Correlations Heatmap"):
    cmap = sns.diverging_palette(40,140,s=100,sep=5, as_cmap=True) # https://seaborn.pydata.org/generated/seaborn.diverging_palette.html
    # cmap = sns.color_palette("Blues", as_cmap=True)

    for col in pvals.columns:
        pvals[col] = pvals[col].apply(lambda x : '*' if x <= conf['alpha'] else ' ' )
    sns.heatmap(df,cmap=cmap,square=True,
                annot = pvals.to_numpy(), fmt='',
                xticklabels=True, yticklabels=1,annot_kws={"size": conf['fontsize'] + 3} )

    plt.title(figtitle,fontsize = conf['fontsize'])
    plt.xticks(fontsize = conf['fontsize'])
    plt.yticks(fontsize = conf['fontsize'])

    plt.savefig(f"{conf['savename']}",bbox_inches='tight',facecolor='white',dpi = 500)




In [9]:
conf ={
    'data1':'data1.xlsx', ## samples are rows  and columns are features
    'data2':'data2.xlsx', ## one columns table, samples are rows 
    'savename':'heatmap_1.png',
    'sampletype':{'Healthy': 'B', 'CVD': 'A'},
    'samples':['Healthy','CVD','all'][0],
    'alpha':0.05,
    'correction_method' : 'fdr_bh', ## or 'bonferroni',
    'fontsize' : 10,
    'option':['ONE','TWO'][1] #    ## ONE (correction is done for eaach column separately), TWO (correction is done for all columns together)
}

In [10]:
# load data
df1 = pd.read_excel(conf['data1'],index_col =0, header= 0 )
df2 = pd.read_excel(conf['data2'],index_col =0, header= 0 )

# separating CVD/Healthy samples
df1.drop(index=[x for x in df1.index if x[-1]!=conf['sampletype'][conf['samples']]], inplace = True)
df2.drop(index=[x for x in df2.index if x[-1]!=conf['sampletype'][conf['samples']]], inplace = True)

In [11]:
print((df1.shape))
df1
res = correlations_multifactors(df1,df2,df_merged)

(31, 11)


NameError: name 'df_merged' is not defined

In [None]:
# merge two tables
df_merged = pd.merge(pd.DataFrame(df2),df1,left_index=True,right_index = True) #first df2 then df1

# calculate correlations and raw pvalues
# corrs_fea , pvalues_fea = correlations(df_merged)
corrs_fea , pvalues_fea = correlations_multifactors(df1,df2,df_merged)
# # pvalue correction
# FDR_res= statsmodels.stats.multitest.multipletests(pvalues_fea.values[0], alpha=0.05, method = conf['correction_method'], is_sorted=False, returnsorted=False)# bonferroni
# corrected_pvalues_fea = pvalues_fea.copy()
# corrected_pvalues_fea.loc[df2.columns[0]] = FDR_res[1]

# pvalue correction Choose option ONE or TWO

if conf['option'] == 'ONE':
    corrected_pvalues_fea = pvalues_fea.copy()

    ## ONE (correction is done for eaach column separately)
    for i in range(len(df2.columns)):
        FDR_res= statsmodels.stats.multitest.multipletests(pvalues_fea.values[i], alpha=0.05, method = conf['correction_method'], is_sorted=False, returnsorted=False)# bonferroni
        corrected_pvalues_fea.loc[df2.columns[i]] = FDR_res[1]
elif conf['option'] == 'TWO':
    corrected_pvalues_fea = pvalues_fea.copy()


    FDR_res= statsmodels.stats.multitest.multipletests([j for i in range(len(pvalues_fea)) for j in pvalues_fea.values[i] ], alpha=0.05, method = conf['correction_method'], is_sorted=False, returnsorted=False)# bonferroni
    res = FDR_res[1].reshape(len(df2.columns),len(df1.columns))
    for i in range(len(df2.columns)):
        corrected_pvalues_fea.loc[df2.columns[i]] = res[i]

# create the heatmap
heatmap(corrs_fea.T,corrected_pvalues_fea.copy().T,conf)

In [6]:
corrs_fea


NameError: name 'corrs_fea' is not defined

In [22]:
df_merged

Unnamed: 0,11100_B,20000,21000,21010_B,21010_C,22001_A,23000,31010_A,31010_B,32001_B,...,gOscillospira,gAkkermansia,fRuminococcaceae,fChristensenellaceaeg,fLachnospiraceaeg,fEnterobacteriaceae,gBacteroides,gRuminococcus,gClostridium,Conversion rate
P001B,0.000207,0.042973,0.029336,0.123256,0.014146,0.000207,0.006582,0.037922,0.000207,0.001619,...,0.008462,0.002521,0.012032,0.000293,0.015901,0.001236,0.174693,0.022861,0.005717,High
P002B,0.000207,0.080632,0.022718,0.082556,0.000207,0.000207,0.009661,0.02718,0.042817,0.000207,...,0.024967,0.001629,0.002626,0.000412,0.014894,0.0,0.101612,0.038768,0.002204,High
P003B,0.002343,0.062257,0.028294,0.064135,0.036739,0.002606,0.006281,0.029837,0.032245,0.000207,...,0.010618,0.000288,0.006931,0.000221,0.006643,0.0,0.176456,0.017594,0.012445,Low
P004B,0.008423,0.023732,0.019837,0.146072,0.015773,0.000207,0.09935,0.016455,0.000207,0.000207,...,0.00482,2.3e-05,0.003213,7.6e-05,0.026828,0.021811,0.271224,0.002937,0.021705,Non
P005B,0.000207,0.062443,0.042722,0.000207,0.049594,0.008844,0.000207,0.045668,0.000207,0.004156,...,0.00348,7.6e-05,0.002221,8.5e-05,0.029329,0.000375,0.151217,0.002151,0.0247,High
P006B,0.004091,0.13758,0.000207,0.000207,0.000207,0.008535,0.000207,0.064544,0.049778,0.003157,...,0.013376,6.2e-05,0.006912,0.000725,0.018161,0.010887,0.423606,0.008282,0.014496,Non
P007B,0.000514,0.000207,0.033128,0.000207,0.033954,0.001289,0.000207,0.000207,0.011477,0.00067,...,0.035768,0.010816,0.005501,0.001755,0.015697,0.000118,0.112555,0.057204,0.01606,High
P008B,0.000207,0.074485,0.021176,0.137031,0.027236,0.005709,0.000579,0.026116,0.000207,0.001236,...,0.032487,0.003762,0.015219,0.003887,0.015845,0.000924,0.126942,0.021516,0.005749,High
P009B,0.002495,0.077311,0.002652,0.116035,0.002277,0.006046,0.000207,0.033495,0.000207,0.001542,...,0.000577,4.4e-05,0.002967,0.0,0.074726,0.001324,0.234034,0.000157,0.016319,Low
P010B,0.000207,0.025096,0.013458,0.120429,0.013123,0.002818,0.001215,0.022925,0.000207,0.000744,...,0.023195,0.0,0.016422,0.001554,0.011313,0.0,0.039214,0.009391,0.003527,High


In [19]:
corrected_pvalues_fea

Unnamed: 0,Lithocholic acid 3-SO4,Isolithocholic acid 3-SO4,Deoxycholic acid 3-SO4,Chenodeoxycholic acid 3-SO4,Hyodeoxycholic acid 3-SO4,Ursodeoxycholic acid 3-SO4,Allocholic acid 3-SO4,Cholic acid 3-SO4,Glycolithocholic acid 3-SO4,Glycoursodeoxycholic acid 3-SO4
Conversion Rate,0.000483,6e-06,0.005392,0.000483,0.000483,0.056321,0.002159,0.085704,0.130545,0.000741


In [20]:
df_merged

Unnamed: 0,Conversion Rate,Lithocholic acid 3-SO4,Isolithocholic acid 3-SO4,Deoxycholic acid 3-SO4,Chenodeoxycholic acid 3-SO4,Hyodeoxycholic acid 3-SO4,Ursodeoxycholic acid 3-SO4,Allocholic acid 3-SO4,Cholic acid 3-SO4,Glycolithocholic acid 3-SO4,Glycoursodeoxycholic acid 3-SO4
P001B,58.857143,0.000855,0.001931,0.000195,3.3e-05,7.7e-05,1.375523e-05,0.001422,9e-06,0.000198,4.4e-05
P002B,42.250531,0.000579,1.6e-05,0.00022,1.4e-05,2.9e-05,2.31949e-07,4.8e-05,6.6e-05,0.000165,2.6e-05
P031B,25.233645,0.000519,4.9e-05,0.000142,3.9e-05,0.000176,2.347436e-06,6.4e-05,2.8e-05,0.000158,8e-06
P030B,7.55608,0.018511,0.000847,0.013459,0.010485,0.00833,7.393716e-06,0.002783,0.006293,0.000624,3.1e-05
P015B,4.692082,0.001131,0.000148,0.000346,0.000111,0.000186,1.923152e-06,0.000168,1.7e-05,0.000378,1.3e-05
P003B,15.909091,0.004606,0.001046,0.047455,0.003589,2.7e-05,0.0005503331,0.014419,0.014423,0.000464,4.8e-05
P004B,3.587444,0.024715,0.000266,0.014914,0.010845,0.001565,1.806688e-06,0.001175,0.014082,0.000834,2.5e-05
P005B,59.673913,0.000412,0.001081,0.000313,6.8e-05,0.01399,5.784992e-06,0.021908,0.000347,0.000426,1.7e-05
P006B,4.657933,0.004465,0.000592,0.017528,0.009023,0.004731,1.894856e-05,0.008567,0.00438,2.1e-05,1.7e-05
P007B,84.464555,0.000184,9e-06,7.2e-05,4.8e-05,2.4e-05,9.998871e-08,6e-06,7e-05,7.4e-05,4e-06
