### This notebook is to use spearman correlation to check association between 1,25-(OH)2D and interested taxa

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from scipy.stats import spearmanr, pearsonr
from statsmodels.sandbox.stats.multicomp import multipletests

import matplotlib.pylab as plt
%matplotlib inline

### merge taxonomy with mapping file

In [2]:
taxa = pd.read_csv('../data/RF_taxa_125.txt', sep='\t', index_col='#OTU ID')

In [3]:
print(taxa.shape)
taxa.head()

(7, 2)


Unnamed: 0_level_0,Taxon,importance
#OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1
TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAAGTTAGAGGTGAAATTTCGGGGCTCAACCCTGAACGTGCCTCTAATACTGTTGAGCTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTAGAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAC,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,0.002447
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTCAAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.002371
TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGGAGCGAAC,k__Bacteria; p__Actinobacteria; c__Coriobacter...,0.002052
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTAAAGCAAGTCTGAAGTGAAAGCCCGCGGCTCAACTGCGGGACTGCTTTGGAAACTGTTTAACTGGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTACTGGACGATAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.002284
TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGGAGGGCAAGTCAGATGTGAAATCCACGGGCTCAACTCGTGAACTGCATTTGAAACTACTCTTCTTGAGTATCGGAGAGGCAATCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGATTGCTGGACGACAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAAC,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.002257


In [4]:
otu = pd.read_csv('../Qiime_updated/feature-table-rare5807.txt', sep='\t', skiprows=1, index_col='#OTU ID',dtype = str)
print(otu.shape)
otu.head()

(39885, 599)


Unnamed: 0_level_0,MN1696,PI4923,PA3754,PI4717,PA3960,MN1521,PA3828,PO6848,PA3199,PO7473,...,PO7027,BI0686,MN2037,PO7199,MN2405,MN2498,BI0397,SD8175,PO7488,PO6966
#OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCCTGCTAAGCTGCAACTGACATTGAGGCTCGAAAGTGTGGGTATCAAAC,2011.0,0.0,32.0,37.0,0.0,0.0,18.0,111.0,0.0,157.0,...,0.0,0.0,0.0,256.0,0.0,0.0,0.0,0.0,438.0,443.0
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCGAAACTGGCAGGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAAC,520.0,250.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,916.0,0.0,235.0,0.0,0.0,51.0,0.0,0.0,0.0
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTAGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAAC,457.0,66.0,0.0,14.0,64.0,0.0,0.0,0.0,295.0,21.0,...,31.0,0.0,0.0,8.0,123.0,242.0,0.0,131.0,0.0,10.0
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTTTTAAGTCAGCGGTGAAAGTCTGTGGCTCAACCATAGAATTGCCGTTGAAACTGGGGGGCTTGAGTATGTTTGAGGCAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTGCCAAGCCATTACTGACGCTGATGCACGAAAGCGTGGGGATCAAAC,460.0,0.0,0.0,160.0,0.0,0.0,93.0,0.0,0.0,0.0,...,70.0,119.0,0.0,0.0,41.0,0.0,0.0,0.0,9.0,463.0
AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACCAACTGACGCTGAGGCTCGAAAGTGTGGGTAGCAAAC,380.0,108.0,12.0,0.0,13.0,253.0,90.0,120.0,0.0,0.0,...,304.0,0.0,0.0,0.0,0.0,287.0,26.0,163.0,0.0,315.0


In [5]:
# subset otu table with intersted ones only
otu_sub = otu.loc[taxa.index].transpose()
print(otu_sub.shape)
otu_sub.head()

(599, 7)


#OTU ID,TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAAGTTAGAGGTGAAATTTCGGGGCTCAACCCTGAACGTGCCTCTAATACTGTTGAGCTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTAGAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTCAAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGGAGCGAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTAAAGCAAGTCTGAAGTGAAAGCCCGCGGCTCAACTGCGGGACTGCTTTGGAAACTGTTTAACTGGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTACTGGACGATAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGGAGGGCAAGTCAGATGTGAAATCCACGGGCTCAACTCGTGAACTGCATTTGAAACTACTCTTCTTGAGTATCGGAGAGGCAATCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGATTGCTGGACGACAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGTGTAGGCGGGATATCAAGTCAGAAGTGAAAATTACGGGCTCAACTCGTAACCTGCTTTTGAAACTGACATTCTTGAGTGAAGTAGAGGCAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTGCTGGGCTTTTACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGTTAATTAAGTTGGATGTGAAATTCCCGGGCTTAACTTGGGAGCTGCATTCAAAACTGGTTAACTAGAGTTCAGGAGAGGGAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAGGCGGCTTTCTGGACTGACACTGACGCTGAGGCACGAAAGCGTGGGGAGCAAAC
MN1696,151.0,12.0,6.0,1.0,0.0,0.0,0.0
PI4923,289.0,0.0,0.0,0.0,209.0,0.0,0.0
PA3754,61.0,1.0,7.0,0.0,41.0,111.0,0.0
PI4717,130.0,0.0,4.0,6.0,0.0,1.0,0.0
PA3960,120.0,1.0,1.0,0.0,0.0,0.0,0.0


In [6]:
mf = pd.read_csv('../data/mros_mapping_alpha.txt', sep='\t', index_col='#SampleID')

In [7]:
mf = mf[['OHV1D3', 'OHV24D3', 'OHVD3', 'ratio_activation', 'ratio_catabolism']]
print(mf.shape)
mf.head()

(599, 5)


Unnamed: 0_level_0,OHV1D3,OHV24D3,OHVD3,ratio_activation,ratio_catabolism
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BI0023,0.0393,1.77,25.8,0.001523,0.068605
BI0056,0.0619,3.91,39.2,0.001579,0.099745
BI0131,0.0521,1.49,23.1,0.002255,0.064502
BI0153,0.0431,2.14,27.3,0.001579,0.078388
BI0215,0.0502,3.62,33.0,0.001521,0.109697


In [8]:
dat = pd.merge(mf, otu_sub, left_index=True, right_index=True)
print(dat.shape)
dat.head()

(599, 12)


Unnamed: 0,OHV1D3,OHV24D3,OHVD3,ratio_activation,ratio_catabolism,TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAAGTTAGAGGTGAAATTTCGGGGCTCAACCCTGAACGTGCCTCTAATACTGTTGAGCTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTAGAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTCAAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGGAGCGAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTAAAGCAAGTCTGAAGTGAAAGCCCGCGGCTCAACTGCGGGACTGCTTTGGAAACTGTTTAACTGGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTACTGGACGATAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGGAGGGCAAGTCAGATGTGAAATCCACGGGCTCAACTCGTGAACTGCATTTGAAACTACTCTTCTTGAGTATCGGAGAGGCAATCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGATTGCTGGACGACAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGTGTAGGCGGGATATCAAGTCAGAAGTGAAAATTACGGGCTCAACTCGTAACCTGCTTTTGAAACTGACATTCTTGAGTGAAGTAGAGGCAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTGCTGGGCTTTTACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGTTAATTAAGTTGGATGTGAAATTCCCGGGCTTAACTTGGGAGCTGCATTCAAAACTGGTTAACTAGAGTTCAGGAGAGGGAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAGGCGGCTTTCTGGACTGACACTGACGCTGAGGCACGAAAGCGTGGGGAGCAAAC
BI0023,0.0393,1.77,25.8,0.001523,0.068605,56.0,0.0,5.0,0.0,125.0,0.0,16.0
BI0056,0.0619,3.91,39.2,0.001579,0.099745,50.0,4.0,2.0,0.0,0.0,24.0,0.0
BI0131,0.0521,1.49,23.1,0.002255,0.064502,39.0,0.0,0.0,0.0,18.0,10.0,6.0
BI0153,0.0431,2.14,27.3,0.001579,0.078388,176.0,0.0,9.0,0.0,114.0,0.0,0.0
BI0215,0.0502,3.62,33.0,0.001521,0.109697,150.0,0.0,1.0,0.0,0.0,0.0,0.0


In [9]:
# convert to float type
dat = dat.apply(pd.to_numeric, errors='coerce')
dat.describe()

Unnamed: 0,OHV1D3,OHV24D3,OHVD3,ratio_activation,ratio_catabolism,TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAAGTTAGAGGTGAAATTTCGGGGCTCAACCCTGAACGTGCCTCTAATACTGTTGAGCTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTAGAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTCAAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGGAGCGAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTAAAGCAAGTCTGAAGTGAAAGCCCGCGGCTCAACTGCGGGACTGCTTTGGAAACTGTTTAACTGGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTACTGGACGATAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGGAGGGCAAGTCAGATGTGAAATCCACGGGCTCAACTCGTGAACTGCATTTGAAACTACTCTTCTTGAGTATCGGAGAGGCAATCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGATTGCTGGACGACAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGTGTAGGCGGGATATCAAGTCAGAAGTGAAAATTACGGGCTCAACTCGTAACCTGCTTTTGAAACTGACATTCTTGAGTGAAGTAGAGGCAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTGCTGGGCTTTTACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGTTAATTAAGTTGGATGTGAAATTCCCGGGCTTAACTTGGGAGCTGCATTCAAAACTGGTTAACTAGAGTTCAGGAGAGGGAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAGGCGGCTTTCTGGACTGACACTGACGCTGAGGCACGAAAGCGTGGGGAGCAAAC
count,567.0,567.0,556.0,556.0,556.0,599.0,599.0,599.0,599.0,599.0,599.0,599.0
mean,0.057775,3.430864,35.229137,0.001772,0.094776,92.245409,1.883139,6.088481,2.981636,43.652755,60.943239,7.013356
std,0.019773,1.834771,12.450758,0.000735,0.02977,102.259858,3.704788,31.11577,17.456434,93.616807,127.139878,32.079539
min,0.0107,0.3,7.8,0.000398,0.018788,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0441,2.175,27.4,0.001316,0.074216,1.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0555,3.18,33.65,0.00166,0.092821,75.0,0.0,1.0,0.0,0.0,6.0,0.0
75%,0.0663,4.235,41.825,0.002081,0.112849,148.0,2.0,5.0,2.0,41.0,60.0,1.0
max,0.156,14.07,104.0,0.006727,0.197786,1228.0,32.0,615.0,381.0,915.0,1055.0,484.0


In [10]:
dat[dat.columns[7]].dtype

dtype('float64')

### correlation

In [11]:
otu_cols = dat.columns[mf.shape[1]:dat.shape[1]]
otu_cols

Index(['TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAAGTTAGAGGTGAAATTTCGGGGCTCAACCCTGAACGTGCCTCTAATACTGTTGAGCTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTAGAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAC',
       'TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTCAAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC',
       'TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGGAGCGAAC',
       'TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTAAAGCAAGTCTGAAGTGAAAGCCCGCGGCTCAACTGCGGGACTGCTTTGGAAACTGTTTAACTGGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTACTGG

In [12]:
vars_vd = np.array(['OHV1D3', 'OHV24D3', 'OHVD3', 'ratio_activation', 'ratio_catabolism'])

In [13]:
results= []
i = 0
print(vars_vd[i])
for j in range(len(otu_cols)):
    tmp = dat[[vars_vd[i], otu_cols[j]]].dropna(axis=0, how='any')
    rho, pval = spearmanr(tmp[vars_vd[i]], tmp[otu_cols[j]])
    tax = taxa['Taxon'][otu_cols[j]]
    results.append([vars_vd[i], otu_cols[j], tax, rho, pval])
    results.append([vars_vd[i], otu_cols[j], rho, pval])

# output table    
results = pd.DataFrame(results, columns=['vars', 'otu', 'tax',
                                         'rho', 'pval']).dropna(axis=0, how='any')
results['fdr pval'] = multipletests(results['pval'], method = 'fdr_bh')[1]
results = results.sort_values(['fdr pval'], ascending=True)

# specific bacteria
index = results.loc[results['fdr pval'] <= 0.05].index
for i in range(len(index)):
    print(results.tax[index[i]], results['rho'][index[i]])

OHV1D3
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__; g__; s__ 0.19262928366201476
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__ -0.17774717622629013
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Christensenellaceae; g__; s__ 0.16856828105409885
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__; s__ 0.15339088959899927
k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Collinsella; s__aerofaciens 0.1246241651960069
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__ -0.11948309970002789


In [14]:
results

Unnamed: 0,vars,otu,tax,rho,pval,fdr pval
10,OHV1D3,TACGTAGGTGGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAG...,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.192629,4e-06,2.7e-05
6,OHV1D3,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAG...,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,-0.177747,2.1e-05,7.2e-05
12,OHV1D3,TACGTAGGGGGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTG...,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.168568,5.5e-05,0.000128
8,OHV1D3,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCG...,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.153391,0.000246,0.000431
4,OHV1D3,TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCG...,k__Bacteria; p__Actinobacteria; c__Coriobacter...,0.124624,0.002953,0.004134
2,OHV1D3,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAG...,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,-0.119483,0.004385,0.005116
0,OHV1D3,TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTG...,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,0.071761,0.087787,0.087787


In [15]:
results.to_csv('../data/correlation_125.txt', sep='\t')

In [16]:
dat.head()

Unnamed: 0,OHV1D3,OHV24D3,OHVD3,ratio_activation,ratio_catabolism,TACGGAGGATTCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAAGTTAGAGGTGAAATTTCGGGGCTCAACCCTGAACGTGCCTCTAATACTGTTGAGCTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTAGAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACGTTGAGGCACGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTCAAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGGAGCGAAC,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTAAAGCAAGTCTGAAGTGAAAGCCCGCGGCTCAACTGCGGGACTGCTTTGGAAACTGTTTAACTGGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTACTGGACGATAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGGAGGGCAAGTCAGATGTGAAATCCACGGGCTCAACTCGTGAACTGCATTTGAAACTACTCTTCTTGAGTATCGGAGAGGCAATCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGATTGCTGGACGACAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAAC,TACGTAGGTGGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGTGTAGGCGGGATATCAAGTCAGAAGTGAAAATTACGGGCTCAACTCGTAACCTGCTTTTGAAACTGACATTCTTGAGTGAAGTAGAGGCAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTGCTGGGCTTTTACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAAC,TACGTAGGGGGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGTTAATTAAGTTGGATGTGAAATTCCCGGGCTTAACTTGGGAGCTGCATTCAAAACTGGTTAACTAGAGTTCAGGAGAGGGAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAGGCGGCTTTCTGGACTGACACTGACGCTGAGGCACGAAAGCGTGGGGAGCAAAC
BI0023,0.0393,1.77,25.8,0.001523,0.068605,56.0,0.0,5.0,0.0,125.0,0.0,16.0
BI0056,0.0619,3.91,39.2,0.001579,0.099745,50.0,4.0,2.0,0.0,0.0,24.0,0.0
BI0131,0.0521,1.49,23.1,0.002255,0.064502,39.0,0.0,0.0,0.0,18.0,10.0,6.0
BI0153,0.0431,2.14,27.3,0.001579,0.078388,176.0,0.0,9.0,0.0,114.0,0.0,0.0
BI0215,0.0502,3.62,33.0,0.001521,0.109697,150.0,0.0,1.0,0.0,0.0,0.0,0.0


In [17]:
# check
dat.rename(columns={dat.columns[5]: taxa.Taxon[dat.columns[5]],
                    dat.columns[6]: taxa.Taxon[dat.columns[6]],
                    dat.columns[7]: taxa.Taxon[dat.columns[7]],
                    dat.columns[8]: taxa.Taxon[dat.columns[8]],
                    dat.columns[9]: taxa.Taxon[dat.columns[9]],
                    dat.columns[10]: taxa.Taxon[dat.columns[10]],
                    dat.columns[11]: taxa.Taxon[dat.columns[11]]}, inplace=True)
dat.head()

Unnamed: 0,OHV1D3,OHV24D3,OHVD3,ratio_activation,ratio_catabolism,k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Rikenellaceae; g__; s__,k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__,k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Collinsella; s__aerofaciens,k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__.1,k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__; s__,k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__; g__; s__,k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Christensenellaceae; g__; s__
BI0023,0.0393,1.77,25.8,0.001523,0.068605,56.0,0.0,5.0,0.0,125.0,0.0,16.0
BI0056,0.0619,3.91,39.2,0.001579,0.099745,50.0,4.0,2.0,0.0,0.0,24.0,0.0
BI0131,0.0521,1.49,23.1,0.002255,0.064502,39.0,0.0,0.0,0.0,18.0,10.0,6.0
BI0153,0.0431,2.14,27.3,0.001579,0.078388,176.0,0.0,9.0,0.0,114.0,0.0,0.0
BI0215,0.0502,3.62,33.0,0.001521,0.109697,150.0,0.0,1.0,0.0,0.0,0.0,0.0


In [18]:
tmp = dat[['OHV1D3', 'k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__; g__; s__']].dropna(axis=0, how='any')
spearmanr(tmp[tmp.columns[0]], tmp[tmp.columns[1]])

SpearmanrResult(correlation=0.19262928366201476, pvalue=3.836328870511795e-06)

In [19]:
tmp = dat[['OHV1D3', 'k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__']].dropna(axis=0, how='any')
spearmanr(tmp[tmp.columns[0]], tmp[tmp.columns[1]])

SpearmanrResult(correlation=array([[ 1.        , -0.1194831 , -0.17774718],
       [-0.1194831 ,  1.        ,  0.3248879 ],
       [-0.17774718,  0.3248879 ,  1.        ]]), pvalue=array([[0.00000000e+00, 4.38517803e-03, 2.07116211e-05],
       [4.38517803e-03, 0.00000000e+00, 2.09903585e-15],
       [2.07116211e-05, 2.09903585e-15, 0.00000000e+00]]))

In [20]:
tmp = dat[['OHV1D3', 'k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Christensenellaceae; g__; s__']].dropna(axis=0, how='any')
spearmanr(tmp[tmp.columns[0]], tmp[tmp.columns[1]])

SpearmanrResult(correlation=0.16856828105409885, pvalue=5.484660631719191e-05)

In [21]:
tmp = dat[['OHV1D3', 'k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__; s__']].dropna(axis=0, how='any')
spearmanr(tmp[tmp.columns[0]], tmp[tmp.columns[1]])

SpearmanrResult(correlation=0.15339088959899927, pvalue=0.0002462150444195747)

In [22]:
tmp = dat[['OHV1D3', 'k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Collinsella; s__aerofaciens']].dropna(axis=0, how='any')
spearmanr(tmp[tmp.columns[0]], tmp[tmp.columns[1]])

SpearmanrResult(correlation=0.1246241651960069, pvalue=0.0029531935642018786)

In [23]:
tmp = dat[['OHV1D3', 'k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Rikenellaceae; g__; s__']].dropna(axis=0, how='any')
spearmanr(tmp[tmp.columns[0]], tmp[tmp.columns[1]])

SpearmanrResult(correlation=0.0717610361397878, pvalue=0.08778725235208422)