In [147]:
import itertools as it
import os

import biom
from matplotlib import rcParams
import matplotlib.colors as mplc
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sn
import statsmodels.api as sms
import statsmodels.formula.api as smf
import skbio

from qiime2 import Artifact, Metadata, MetadataColumn

In [148]:
rcParams['font.sans-serif'] = ['Helvetica', 'Arial']
rcParams['pdf.fonttype'] = 42
np.set_printoptions(precision=5, suppress=True)  # suppress scientific float notation

In [149]:
%matplotlib inline

In [150]:
meta = pd.read_csv('data/output/simulation/samples/metadata.tsv', sep='\t', dtype=str)
meta.set_index('sample-id', inplace=True)
meta = meta.loc[meta['set'] == '1']
meta['color'] = meta['age'].replace({'infant': '#1f78b4', 
                                     'adult': '#e31a1c'})

In [151]:
methods = ['reference', 'otus', 'asvs', 'sidle']

In [152]:
metrics = ['unweighted-unifrac', 'weighted-unifrac', 'braycurtis', 'genus-braycurtis']

In [153]:
beta_values = {
    method: {
        metric: [
            Artifact.load(f'data/output/simulation/merged/{method}'
                          f'/rarified-beta/{metric}/{i}.qza').view(skbio.DistanceMatrix)
            for i in np.arange(0, 5)
        ]
        for metric in metrics
    }
    for method in methods
}
adonis_values = {
    method: {
        metric: pd.concat(axis=1, objs=[
            pd.read_csv(f'data/output/simulation/merged/'
                        f'{method}/rarified-beta/{metric}-adonis/{i}.tsv',
                        sep='\t', index_col=0)
             for i in np.arange(0, 5)
        ])
        for metric in metrics
    }
    for method in methods
}

In [154]:
adonis_values['reference']

{'unweighted-unifrac':               age         age         age         age         age
 R2       0.682843    0.666099    0.681672    0.682048    0.673309
 p_999    0.001000    0.001000    0.001000    0.001000    0.001000
 F      124.874884  115.704375  124.201807  124.417344  119.537992,
 'weighted-unifrac':               age         age         age         age         age
 R2       0.865155    0.863171    0.861985    0.862505    0.860158
 p_999    0.001000    0.001000    0.001000    0.001000    0.001000
 F      372.122775  365.886044  362.244583  363.832410  356.753715,
 'braycurtis':               age         age         age         age         age
 R2       0.835040    0.834237    0.835219    0.836091    0.835154
 p_999    0.001000    0.001000    0.001000    0.001000    0.001000
 F      293.600945  291.896593  293.981830  295.854941  293.844493,
 'genus-braycurtis':               age         age         age         age         age
 R2       0.862003    0.861307    0.861921    0.86

In [155]:
corr = {
    method: pd.DataFrame({
        metric: pd.Series([
            skbio.stats.distance.mantel(x, y)[0]
            for x, y in it.product(reference[metric], dms)
            if (x != y)
        ])
        for metric, dms in method_dms.items()
    })
    for method, method_dms in beta_values.items()
}
corr_p = {
    method: pd.DataFrame({
        metric: pd.Series([
            skbio.stats.distance.mantel(x, y)[1]
            for x, y in it.product(reference[metric], dms)
            if (x != y)
        ])
        for metric, dms in method_dms.items()
    })
    for method, method_dms in beta_values.items()
}

In [156]:
adonis_r = {
    method: pd.DataFrame({
        metric: res.loc['R2']
        for metric, res in adonis.items()
    })
    for method, adonis in adonis_values.items()
}
adonis_p = {
    method: pd.DataFrame({
        metric: res.loc['p_999']
        for metric, res in adonis.items()
    })
    for method, adonis in adonis_values.items()
}

In [157]:
distance_comparisons = pd.DataFrame({
    ('adonis', 'R2', 'mean'): pd.DataFrame({metric: adonis.mean(axis=0) 
                                            for metric, adonis in adonis_r.items()}).unstack(),
    ('adonis', 'R2', 'std'): pd.DataFrame({metric: adonis.std(axis=0) 
                                           for metric, adonis in adonis_r.items()}).unstack(),
    ('adonis', 'p', 'max'): pd.DataFrame({metric: adonis.max(axis=0) 
                                          for metric, adonis in adonis_p.items()}).unstack(),
    ('mantel', 'R', 'mean'): pd.DataFrame({metric: mantel.mean(axis=0) 
                                           for metric, mantel in corr.items()}).unstack(),
    ('mantel', 'R', 'std'): pd.DataFrame({metric: mantel.std(axis=0) 
                                          for metric, mantel in corr.items()}).unstack(),
    ('mantel', 'p', 'max'): pd.DataFrame({metric: mantel.max(axis=0) 
                                          for metric, mantel in corr_p.items()}).unstack(),
})
distance_comparisons.index.set_names(['method', 'metric'], inplace=True)
distance_comparisons.reset_index(inplace=True)
distance_comparisons['metric'] = distance_comparisons['metric'].astype(pd.CategoricalDtype(categories=metrics))
distance_comparisons['method'] = pd.Categorical(distance_comparisons['method'], categories=methods, ordered=True)
distance_comparisons.set_index(['metric', 'method'], inplace=True)
distance_comparisons = distance_comparisons.loc[metrics]

In [158]:
distance_comparisons.round(3).to_csv('data/output/tables/table_2_beta_rarefaction.tsv', sep='\t')

In [159]:
 a = pd.Categorical(pd.Series(['1', '2', '3']), categories=['2', '1', '3']).sort_values()

In [174]:
compare_p = pd.DataFrame({
    metric: {
        method: scipy.stats.ttest_ind(corr['sidle'][metric], corr[method][metric], equal_var=False)[1]
        for method in ['otus', 'asvs']
    } 
    for metric in metrics})
comapare_dir = pd.DataFrame({
    metric: {
        method: (corr['sidle'][metric].mean() > corr[method][metric].mean())
        for method in ['otus', 'asvs']
    }
    for metric in metrics
})

In [184]:
compare_p_corected = pd.Series(
    sms.stats.multipletests(compare_p.unstack(), method='fdr_bh')[1],
    index=compare_p.unstack().index).unstack()

In [185]:
compare_p_corected

Unnamed: 0,otus,asvs
unweighted-unifrac,0.001153767,9.726326e-09
weighted-unifrac,6.843749e-24,8.831601e-27
braycurtis,2.932583e-20,6.843749e-24
genus-braycurtis,0.05162973,8.608062e-09


In [178]:
distance_comparisons


Unnamed: 0_level_0,Unnamed: 1_level_0,adonis,adonis,adonis,mantel,mantel,mantel
Unnamed: 0_level_1,Unnamed: 1_level_1,R2,R2,p,R,R,p
Unnamed: 0_level_2,Unnamed: 1_level_2,mean,std,max,mean,std,max
metric,method,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
unweighted-unifrac,reference,0.677194,0.007309,0.001,0.983573,0.000811,0.001
unweighted-unifrac,otus,0.669261,0.011064,0.001,0.978613,0.001559,0.001
unweighted-unifrac,asvs,0.618091,0.012397,0.001,0.976444,0.002079,0.001
unweighted-unifrac,sidle,0.678519,0.009678,0.001,0.980215,0.001672,0.001
weighted-unifrac,reference,0.862595,0.001817,0.001,0.998678,0.000167,0.001
weighted-unifrac,otus,0.842006,0.001474,0.001,0.975149,0.000498,0.001
weighted-unifrac,asvs,0.826064,0.001132,0.001,0.974331,0.000323,0.001
weighted-unifrac,sidle,0.840747,0.001214,0.001,0.978084,0.000561,0.001
braycurtis,reference,0.835148,0.000659,0.001,0.998892,5.4e-05,0.001
braycurtis,otus,0.826163,0.0012,0.001,0.998351,6.7e-05,0.001
