In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import biom
import os
import scipy.stats

In [2]:
sample_md_fp = 'combined-map.tsv'
sample_md = pd.read_csv(sample_md_fp, sep='\t', index_col=0, dtype=object)


In [3]:
data_dir = "../microbiome-data/run1-4/vsearch-100/cd_even5721/"
tax_tables = []
for level in range(2,7):
    l_fp = os.path.join(data_dir, "taxa_plots/table_mc5721_sorted_L%d.txt" % level)
    l_df = pd.read_csv(l_fp, sep='\t', skiprows=1, index_col=0)
    l_df.index = l_df.index.astype(str)
    tax_tables.append(l_df.T)
tax_table = pd.concat(tax_tables, axis=1)
sample_md = sample_md.merge(tax_table, left_index=True, right_index=True)


In [4]:
from asd import filter_sample_md
initial_asd_stool = filter_sample_md(sample_md, includes=[('Group', 'autism'), ('SampleType', 'stool'), ('time_point', '1')])
initial_asd_stool = initial_asd_stool.set_index('SubjectID')

final_asd_stool = filter_sample_md(sample_md, includes=[('Group', 'autism'), ('SampleType', 'stool'), ('time_point', '4')])
final_asd_stool = final_asd_stool.set_index('SubjectID')

In [5]:
donor_stool = filter_sample_md(sample_md, includes=[('Group', 'donor-initial'), ('SampleType', 'donor-stool')])

In [9]:
_data = []
for tax in tax_table.columns:
    initial_asd_median = initial_asd_stool[tax].median()
    final_asd_median = final_asd_stool[tax].median()
    donor_median = donor_stool[tax].median()
    if (initial_asd_median < donor_median) and (final_asd_median > initial_asd_median):
        t, p = scipy.stats.ttest_1samp(final_asd_stool[tax] - initial_asd_stool[tax], popmean=0)
        if t > 0:
            p = p/2
        else:
            p = 1.0
        try:
            fold = donor_median/initial_asd_median
        except ZeroDivisionError:
            fold = np.inf
        _data.append([tax, fold, donor_median, initial_asd_median, final_asd_median, t, p])
tax_engraftment_df = pd.DataFrame(_data, columns=['Taxonomy', 'Fold donor enrichment', 'Donor median abundance',
                                                  'Initial ASD median abundance', 
                                                  'Final ASD median abundance', 't (one-tailed, two-sample)', 
                                                  'p-value']).set_index('Taxonomy')
tax_engraftment_df.sort('Fold donor enrichment', ascending=False)



Unnamed: 0_level_0,Fold donor enrichment,Donor median abundance,Initial ASD median abundance,Final ASD median abundance,"t (one-tailed, two-sample)",p-value
Taxonomy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
k__Bacteria;p__Proteobacteria;c__Alphaproteobacteria,inf,0.000358,0.0,0.00133,2.451107,0.012678
k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae,inf,0.000505,0.0,5.8e-05,2.33276,0.016103
k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;Other,inf,0.000506,0.0,5.6e-05,2.512891,0.011175
k__Bacteria;p__Tenericutes,inf,0.00137,0.0,0.000126,-0.704508,1.0
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__[Odoribacteraceae];g__Butyricimonas,inf,8.4e-05,0.0,0.004541,1.424984,0.086131
k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;Other;Other,inf,0.000506,0.0,5.6e-05,2.512891,0.011175
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Veillonellaceae;g__Dialister,128.540033,0.034461,0.000268,0.000458,-0.568737,1.0
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira,5.878596,0.02491,0.004237,0.006103,0.656165,0.260251
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Butyricicoccus,5.455129,0.00234,0.000429,0.000898,1.794916,0.045234
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__,5.085562,0.016714,0.003287,0.020933,2.280382,0.017879


The results of this analysis are in a [Google Sheet](https://docs.google.com/spreadsheets/d/1iwU6wQ9JApx7I6H4D53XFDFRu_Yige2HaiC88YkjqW0/edit?usp=sharing).