# Description

* The relatively abundant taxa seem to be present in all gradient fractions.
  * \>0.1% pre-fractionation abundance (at least for 1 12C-con gradient)
* Goal: 
  * For all 12C-Con gradients determine the detection threshold:
    * What is the % abundance cutoff where taxa are no longer detected in all gradients

# Setting parameters

In [2]:
%load_ext rpy2.ipython

In [3]:
%%R
physeqDir = '/var/seq_data/fullCyc/MiSeq_16SrRNA/515f-806r/lib1-7/phyloseq/'
physeq_bulkCore = 'bulk-core'
physeq_SIP_core = 'SIP-core_unk'

# Init

In [4]:
%%R
library(dplyr)
library(tidyr)
library(ggplot2)
library(phyloseq)

Attaching package: ‘dplyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)

    filter, lag


  res = super(Function, self).__call__(*new_args, **new_kwargs)

    intersect, setdiff, setequal, union


  res = super(Function, self).__call__(*new_args, **new_kwargs)


# Mapping bulk and SIP data

In [5]:
%%R
physeq.file = file.path(physeqDir, physeq_bulkCore)
physeq = readRDS(physeq.file)
physeq.m = physeq %>% sample_data
physeq

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4950 taxa and 9 samples ]
sample_data() Sample Data:       [ 9 samples by 17 sample variables ]
tax_table()   Taxonomy Table:    [ 4950 taxa by 8 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 4950 tips and 4949 internal nodes ]


In [7]:
%%R 
# parsing out to just 12C-Con gradients
physeq.f = prune_samples((physeq.m$Exp_type == 'microcosm_bulk') | 
                         (physeq.m$Exp_type == 'SIP' & 
                          physeq.m$Substrate == '12C-Con'),
                         physeq) %>% 
    filter_taxa(function(x) sum(x) > 0, TRUE)
physeq.f.m = physeq.f %>% sample_data %>% as.matrix %>% as.data.frame 
physeq.f         

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4950 taxa and 9 samples ]
sample_data() Sample Data:       [ 9 samples by 17 sample variables ]
tax_table()   Taxonomy Table:    [ 4950 taxa by 8 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 4950 tips and 4949 internal nodes ]


In [103]:
%%R
# check of samples
physeq.f.m %>% 
    as.data.frame() %>%
    group_by(Exp_type, Day, Substrate) %>%
    summarize(n = n())

Source: local data frame [2 x 4]
Groups: Exp_type, Day [?]

        Exp_type    Day Substrate     n
          (fctr) (fctr)    (fctr) (int)
1 microcosm_bulk      1   12C-Con     1
2            SIP      1   12C-Con    26
