# Goal

* A basic, full run of the SIPSim pipeline with the whole dataset to see:
  * Does it work?
  * Does the output as each stage look OK?

# Setting variables

In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1210/'
genomeDir = '/home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/'
R_dir = '/home/nick/notebook/SIPSim/lib/R/'

# Init

In [2]:
import glob
from os.path import abspath
import nestly

In [3]:
%load_ext rpy2.ipython

In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: grid


# Simulating fragments

### OLD: nestly pipeline log

In [None]:
%%writefile $bashFile
#!/bin/bash

# simulating fragments
SIPSim fragments \
    {genome_index} \
    --fp {genome_dir} \
    --fr {primers} \
    --fld skewed-normal,9000,2500,-5 \
    --flr None,None \
    --nf 10000 \
    --np {np} \
    2> {fileName}.log \
    > {fileName}.pkl
    
# converting to kde object
SIPSim fragment_kde \
    {fileName}.pkl \
    > {fileName}_kde.pkl
    
# adding diffusion
SIPSim diffusion \
    {fileName}_kde.pkl \
    --np {np} \
    > {fileName}_kde_dif.pkl
    
# creating a community file
SIPSim gradientComms \
    {genome_index} \
    --n_comm 2 \
    > comm.txt

# making incorp file
SIPSim incorpConfigExample \
  --percTaxa {percTaxa} \
  --percIncorpUnif {percIncorp} \
  > {percTaxa}_{percIncorp}.config

# adding isotope incorporation to BD distribution
SIPSim isoIncorp \
    {fileName}_kde_dif.pkl \
    {percTaxa}_{percIncorp}.config \
    --comm comm.txt \
    --np {np} \
    > {fileName}_kde_dif_incorp.pkl
 
# calculating BD shift from isotope incorporation
SIPSim BD_shift \
    {fileName}_kde_dif.pkl \
    {fileName}_kde_dif_incorp.pkl \
    --np {np} \
    > {fileName}_kde_dif_incorp_BD-shift.txt
       
# simulating gradient fractions
SIPSim fractions \
    comm.txt \
    > fracs.txt

# simulating an OTU table
SIPSim OTU_table \
    {fileName}_kde_dif_incorp.pkl \
    comm.txt \
    fracs.txt \
    --abs {abs} \
    --np {np} \
    > OTU_abs{abs}.txt
    
# subsampling from the OTU table (simulating sequencing of the DNA pool)
SIPSim OTU_subsample \
    --dist_params low:{subsample},high:{subsample} \
    OTU_abs{abs}.txt \
    > OTU_n2_abs{abs}_sub{subsample}.txt

# making a wide table
SIPSim OTU_wideLong -w \
    OTU_n2_abs{abs}_sub{subsample}.txt \
    > OTU_n2_abs{abs}_sub{subsample}_w.txt
    
# making metadata (phyloseq: sample_data)
SIPSim OTU_sampleData \
    OTU_n2_abs{abs}_sub{subsample}.txt \
    > OTU_n2_abs{abs}_sub{subsample}_meta.txt
  
#-- R analysis --#
export PATH={R_dir}:$PATH
# plotting taxon abundances
OTU_taxonAbund.r \
    OTU_n2_abs{abs}_sub{subsample}.txt \
    -r {topTaxaToPlot} \
    -o OTU_n2_abs{abs}_sub{subsample} 
# running DeSeq2 and making confusion matrix on predicting incorporators
## making phyloseq object from OTU table
phyloseq_make.r \
    OTU_n2_abs{abs}_sub{subsample}_w.txt \
    -s OTU_n2_abs{abs}_sub{subsample}_meta.txt \
    > OTU_n2_abs{abs}_sub{subsample}.physeq
## filtering phyloseq object to just taxa/samples of interest
phyloseq_edit.r \
    OTU_n2_abs{abs}_sub{subsample}.physeq \
    --BD_min {BD_min} --BD_max {BD_max} \
    > OTU_n2_abs{abs}_sub{subsample}_filt.physeq
## making ordination
phyloseq_ordination.r \
    OTU_n2_abs{abs}_sub{subsample}_filt.physeq \
    OTU_n2_abs{abs}_sub{subsample}_bray-NMDS.pdf
## DESeq2
phyloseq_DESeq2.r \
    OTU_n2_abs{abs}_sub{subsample}_filt.physeq \
    > OTU_n2_abs{abs}_sub{subsample}_DESeq2
## Confusion matrix
DESeq2_confuseMtx.r \
    {fileName}_kde_dif_incorp_BD-shift.txt \
    OTU_n2_abs{abs}_sub{subsample}_DESeq2 \
    --padj {padj} --log2neg {log2neg} 