# Correlations between eco-group response (averaged absolute abundance) and their baseline abundances

In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr, pearsonr
from statsmodels.stats.multitest import multipletests
from scipy.integrate import simps

# read meta data
df_meta = pd.read_csv('../../our_data/meta_data.csv', index_col=0)
df_meta = df_meta[df_meta.Diet=='Inulin']

# read bacterial relative abundance
df_16S_absab = pd.read_csv('../../our_data/16S_absolute_abundance_species.csv', index_col=0)
df_16S_absab['TotalLoad'] = df_16S_absab.sum(axis=1)

# calculate group-level abundance by summing over individual group members
inulin_degrader_group = ['Bacteroides-acidifaciens','Muribaculaceae','Faecalibaculum','Parasutterella','Bacteroides']
inulin_degrader_subgroup = ['Bacteroides-acidifaciens','Muribaculaceae']
generic_responder_subgroup = ['Akkermansia-muciniphila','Bacteroides-uniformis']
df_16S_absab = df_16S_absab[inulin_degrader_group + generic_responder_subgroup + ['TotalLoad']]
df_16S_absab['Inulin_degrader_group'] = df_16S_absab[inulin_degrader_group].sum(axis=1)
df_16S_absab['Generic_responder_subgroup'] = df_16S_absab[generic_responder_subgroup].sum(axis=1)

# join table
df = pd.merge(df_meta, df_16S_absab, left_index=True, right_index=True, how='inner')

# quantify averaged response using area under the curve
lines = []
#for taxa in inulin_degrader_group+generic_responder_subgroup+['Inulin_degrader_group','Generic_responder_subgroup']:
for taxa in inulin_degrader_subgroup+generic_responder_subgroup:
    baseline = []
    response_ave = []
    for mice in set(df.MiceID):
        df_tmp = df[df.MiceID==mice].sort_values(by='Day')
        vendor = list(set(df_tmp.Vendor))[0]
        x = list(df_tmp.Day)
        y = list(df_tmp[taxa])
        baseline.append(df_tmp.loc[df_tmp.Day==0,taxa].values[0])
        response_ave.append(simps(y,x)/np.max(x))
    pearson_coef, pearson_P = pearsonr(baseline, response_ave)
    spearman_coef, spearman_P = spearmanr(baseline, response_ave)
    lines.append([taxa, taxa, pearson_coef, pearson_P, spearman_coef, spearman_P])
    
# # add association between Akk (response) and B. acidifaciens (baseline)
# baseline = []
# response_ave = []
# for mice in set(df.MiceID):
#     df_tmp = df[df.MiceID==mice].sort_values(by='Day')
#     vendor = list(set(df_tmp.Vendor))[0]
#     x = list(df_tmp.Day)
#     y = list(df_tmp['Akkermansia-muciniphila'])
#     baseline.append(df_tmp.loc[df_tmp.Day==0,'Bacteroides-acidifaciens'].values[0])
#     response_ave.append(simps(y,x)/np.max(x))
# pearson_coef, pearson_P = pearsonr(baseline, response_ave)
# spearman_coef, spearman_P = spearmanr(baseline, response_ave)
# lines.append(['Akkermansia-muciniphila', 'Bacteroides-acidifaciens', pearson_coef, pearson_P, spearman_coef, spearman_P])

# # add association between total load (response) and degraders (baseline)
# baseline = []
# response_ave = []
# for taxa in inulin_degrader_group+['Inulin_degrader_group']:
#     for mice in set(df.MiceID):
#         df_tmp = df[df.MiceID==mice].sort_values(by='Day')
#         vendor = list(set(df_tmp.Vendor))[0]
#         x = list(df_tmp.Day)
#         y = list(df_tmp['TotalLoad'])
#         baseline.append(df_tmp.loc[df_tmp.Day==0,taxa].values[0])
#         response_ave.append(simps(y,x)/np.max(x))
#     pearson_coef, pearson_P = pearsonr(baseline, response_ave)
#     spearman_coef, spearman_P = spearmanr(baseline, response_ave)
#     lines.append(['TotalLoad', taxa, pearson_coef, pearson_P, spearman_coef, spearman_P])

df_res = pd.DataFrame(lines, columns=['ResponseTaxa','BaselineTaxa','Corrcoef_pr','P_pr','Corrcoef_sm','P_sm'])
df_res['P_adj_pr'] = multipletests(df_res['P_pr'],method='fdr_bh')[1]
df_res['P_adj_sm'] = multipletests(df_res['P_sm'],method='fdr_bh')[1]
df_res = df_res[['ResponseTaxa','BaselineTaxa','Corrcoef_pr','P_pr','P_adj_pr','Corrcoef_sm','P_sm','P_adj_sm']]

df_res

Unnamed: 0,ResponseTaxa,BaselineTaxa,Corrcoef_pr,P_pr,P_adj_pr,Corrcoef_sm,P_sm,P_adj_sm
0,Bacteroides-acidifaciens,Bacteroides-acidifaciens,0.353472,0.150169,0.150169,0.708978,0.0009869259,0.001973852
1,Muribaculaceae,Muribaculaceae,0.591655,0.009698,0.03879,0.49226,0.03796502,0.05062003
2,Akkermansia-muciniphila,Akkermansia-muciniphila,0.512612,0.029613,0.059227,0.075335,0.7663923,0.7663923
3,Bacteroides-uniformis,Bacteroides-uniformis,0.465647,0.051478,0.068637,0.93175,1.908298e-08,7.633191e-08
