In [1]:
import qiime2 as q2
import biom as biom
import pandas as pd
from skbio import (DistanceMatrix, OrdinationResults)
from qiime2.plugins.feature_table.methods import rarefy
from qiime2.plugins.diversity.actions import (beta_phylogenetic, beta, pcoa)
from qiime2.plugins.deicode.actions import rpca
from assets.step_wise_anova import run_stepwise_anova


  import pandas.util.testing as pdt


In [2]:
# init
rare = 5000 # rare depth
minfeat = 2 # remove doubletons
# encode different protocols
protocols = {}
protocols['16S'] = [5000,
                    '../data/%s/extraction_test_round_3_biom_lod.qza' % ('16S'),
                    '../data/%s/metadata_12201_round3_qiitaIDs_2020.08.12_qiime2.txt' % ('16S'),
                    '../data/%s/extraction_test_round3_16S_150bp_deblur_insertion_tree.tre' % ('16S')]
protocols['metagenomic'] = [17000,
                             '../data/%s/gotu_profile_updated_sampleIDs_min17K.qza' % ('Metagenomic'),
                            '../data/%s/metadata_12201_round3_qiitaIDs_2020.08.17_qiime2.txt' % ('Metagenomic'),
                            '../data/%s/phylogeny.qza' % ('Metagenomic')]
# repeat both protocols
for protocol, (rare, table, metadata, tree) in protocols.items():
    # import data
    table = q2.Artifact.load(table)
    if 'qza' not in tree:
        tree = q2.Artifact.import_data('Phylogeny[Rooted]', tree)
    else:
        tree = q2.Artifact.load(tree)
    metadata = q2.Metadata.load(metadata)
    # filter and rare
    bt = table.view(biom.Table)
    bt = bt.filter(bt.ids('sample')[bt.sum('sample') >= rare])
    bt = bt.filter(bt.ids('observation')[bt.sum('observation') > minfeat], 'observation')
    table = q2.Artifact.import_data('FeatureTable[Frequency]', bt)
    # rarefy 
    rarTable = rarefy(table, rare).rarefied_table
    # unifrac/jaccard
    weighted_unifrac_dist = beta_phylogenetic(rarTable, tree, 'weighted_unifrac').distance_matrix
    unweighted_unifrac_dist = beta_phylogenetic(rarTable, tree, 'unweighted_unifrac').distance_matrix
    jaccard_dist = beta(rarTable, 'jaccard').distance_matrix
    weighted_unifrac_pcoa = pcoa(weighted_unifrac_dist).pcoa
    unweighted_unifrac_pcoa = pcoa(unweighted_unifrac_dist).pcoa
    jaccard_pcoa = pcoa(jaccard_dist).pcoa
    # RPCA
    (rpca_biplot, rpca_dist) = rpca(table)
    # encode all results
    ordinations = {'weighted_unifrac':weighted_unifrac_pcoa.view(OrdinationResults).samples,
                    'unweighted_unifrac':unweighted_unifrac_pcoa.view(OrdinationResults).samples,
                    'jaccard':jaccard_pcoa.view(OrdinationResults).samples,
                    'RPCA':rpca_biplot.view(OrdinationResults).samples}
    effect_size = {}
    # run stepwise RDA on all ordinations
    for ord_type, orddf in ordinations.items():
        orddf, mf = (orddf[[0,1,2]], metadata.to_dataframe().copy().reindex(orddf.index))
        effect_size_df_tmp = run_stepwise_anova(orddf, mf,
                                                ['bead_beating', 'biomass_sample',
                                                 'extraction_protocol', 
                                                 'host_subject_id','sample_type_2'])
        effect_size_df_tmp.index = [i.replace('+ ', '') for i in effect_size_df_tmp.index]
        effect_size[ord_type] = effect_size_df_tmp
    # merge and save
    effect_size = pd.concat(effect_size)
    effect_size.to_csv('../results/%s-Stepwise-ANOVA.csv' % (protocol))




Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: /Users/cmartino/Dropbox/bin/kit-compare-updated/code/assets/stepwise-rda.R /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmp01hio3y6/ord_.tsv /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmp01hio3y6/mf_.txt /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmp01hio3y6/output.effect.size.tsv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: /Users/cmartino/Dropbox/bin/kit-compare-updated/code/assets/stepwise-rda.R /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmppfk6wlzs/ord_.tsv /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmppfk6wlzs/mf_.txt /



Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: /Users/cmartino/Dropbox/bin/kit-compare-updated/code/assets/stepwise-rda.R /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmpkwu7soi8/ord_.tsv /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmpkwu7soi8/mf_.txt /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmpkwu7soi8/output.effect.size.tsv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: /Users/cmartino/Dropbox/bin/kit-compare-updated/code/assets/stepwise-rda.R /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmpq4e0x4to/ord_.tsv /var/folders/tt/zz8n1d_55_n6r7czk3k1qwcw0000gp/T/tmpq4e0x4to/mf_.txt /