# Goal

* Semultiplex MiSeq 16S rRNA sequence data for the Georg Animal Microbiome dataset


## Sequencing method summary

* Primers: Carporaso 515F-806R (only forward primer is barcoded)
* Sequencer: MiSeq
* NGS library prep: paired-end 250 bp

## Notes

* There are 2 sets of read files (2 MiSeq runs)
  * These will need to be demultiplexed separately and then combined

# Var

## Files/dirs

In [1]:
import os

work_dir = '/ebio/abt3_projects/Georg_animal_feces/data/16S/LLA/'

# reads
georg_read_dir = '/ebio/abt3_projects/Georg_animal_feces/data/16S/raw_run_data/georg/qiime2_import/'
georg_read1_file = os.path.join(georg_read_dir, 'forward.fastq.gz')
georg_read2_file = os.path.join(georg_read_dir, 'reverse.fastq.gz')
georg_index_file = os.path.join(georg_read_dir, 'barcodes.fastq.gz')
tony_read_dir = '/ebio/abt3_projects/Georg_animal_feces/data/16S/raw_run_data/tony/qiime2_import/'
tony_read1_file = os.path.join(tony_read_dir, 'forward.fastq.gz')
tony_read2_file = os.path.join(tony_read_dir, 'reverse.fastq.gz')
tony_index_file = os.path.join(tony_read_dir, 'barcodes.fastq.gz')

# mapping files 
georg_map_file = '/ebio/abt3_projects/Georg_animal_feces/data/mapping/mapping_from_georg_run/unified_ill_georgtest_map.txt'
tony_map_file = '/ebio/abt3_projects/Georg_animal_feces/data/mapping/mapping_from_tony_run/unified_ill_tony_map.txt'

## SGE params

In [2]:
email='nyoungblut@tuebingen.mpg.de'
threads=20   
memory='3G'
job_time='4:0:0'
SGE_out='~/SGE/LLA_Georg'
conda_env='qiime2'

# Init

In [3]:
%cd $work_dir

/ebio/abt3_projects/Georg_animal_feces/data/16S/LLA


# Method

## Importing data

### Georg

In [4]:
job = 'bash -c "source activate {}; {}"'
cmd = """qiime tools import \
  --type EMPPairedEndSequences \
  --input-path {input_path} \
  --output-path georg_seqs.qza
  """
cmd = cmd.format(input_path=georg_read_dir)

job = job.format(conda_env, cmd)
print(job)

bash -c "source activate qiime2; qiime tools import   --type EMPPairedEndSequences   --input-path /ebio/abt3_projects/Georg_animal_feces/data/16S/raw_run_data/georg/qiime2_import/   --output-path georg_seqs.qza
  "


In [5]:
!$job 

In [6]:
!ls -thlc

total 6.2G
-rw-r--r-- 1 nyoungblut abt3 6.2G Dec  4 11:24 georg_seqs.qza
drwxr-sr-x 2 nyoungblut abt3  114 Dec  4 11:10 georg
drwxr-sr-x 2 nyoungblut abt3  113 Dec  4 11:10 tony
drwxr-sr-x 2 nyoungblut abt3  153 Dec  1 13:42 phyloseq
drwxr-sr-x 5 nyoungblut abt3  251 Dec  1 12:23 tmp
-rw-r--r-- 1 nyoungblut abt3  567 Nov 30 20:35 rooted_tree.sh
-rw-r--r-- 1 nyoungblut abt3  535 Nov 30 19:47 mask_align.sh
-rw-r--r-- 1 nyoungblut abt3  541 Nov 30 19:36 align_seqs.sh
-rw-r--r-- 1 nyoungblut abt3  562 Nov 30 19:07 tax_filter.sh
-rw-r--r-- 1 nyoungblut abt3  570 Nov 30 19:03 seq_filter.sh
-rw-r--r-- 1 nyoungblut abt3  767 Nov 30 19:02 export.sh
-rw-r--r-- 1 nyoungblut abt3  501 Nov 30 18:51 view_tax.sh
-rw-r--r-- 1 nyoungblut abt3   75 Nov 30 18:12 screenlog.0
-rw-r--r-- 1 nyoungblut abt3  674 Nov 30 16:19 taxonomy.sh
-rw-r--r-- 1 nyoungblut abt3  519 Nov 30 16:07 rep_seqs.sh
-rw-r--r-- 1 nyoungblut abt3  535 Nov 30 13:40 rarefy.sh
-rw-r--r-- 1 nyoungblut abt3  659 Nov 30 1

### Tony

In [7]:
job = 'bash -c "source activate {}; {}"'
cmd = """qiime tools import \
  --type EMPPairedEndSequences \
  --input-path {input_path} \
  --output-path tony_seqs.qza
  """
cmd = cmd.format(input_path=tony_read_dir)

job = job.format(conda_env, cmd)
print(job)

bash -c "source activate qiime2; qiime tools import   --type EMPPairedEndSequences   --input-path /ebio/abt3_projects/Georg_animal_feces/data/16S/raw_run_data/tony/qiime2_import/   --output-path tony_seqs.qza
  "


In [8]:
!$job 

In [9]:
!ls -thlc

total 13G
-rw-r--r-- 1 nyoungblut abt3 6.9G Dec  4 11:37 tony_seqs.qza
-rw-r--r-- 1 nyoungblut abt3 6.2G Dec  4 11:24 georg_seqs.qza
drwxr-sr-x 2 nyoungblut abt3  114 Dec  4 11:10 georg
drwxr-sr-x 2 nyoungblut abt3  113 Dec  4 11:10 tony
drwxr-sr-x 2 nyoungblut abt3  153 Dec  1 13:42 phyloseq
drwxr-sr-x 5 nyoungblut abt3  251 Dec  1 12:23 tmp
-rw-r--r-- 1 nyoungblut abt3  567 Nov 30 20:35 rooted_tree.sh
-rw-r--r-- 1 nyoungblut abt3  535 Nov 30 19:47 mask_align.sh
-rw-r--r-- 1 nyoungblut abt3  541 Nov 30 19:36 align_seqs.sh
-rw-r--r-- 1 nyoungblut abt3  562 Nov 30 19:07 tax_filter.sh
-rw-r--r-- 1 nyoungblut abt3  570 Nov 30 19:03 seq_filter.sh
-rw-r--r-- 1 nyoungblut abt3  767 Nov 30 19:02 export.sh
-rw-r--r-- 1 nyoungblut abt3  501 Nov 30 18:51 view_tax.sh
-rw-r--r-- 1 nyoungblut abt3   75 Nov 30 18:12 screenlog.0
-rw-r--r-- 1 nyoungblut abt3  674 Nov 30 16:19 taxonomy.sh
-rw-r--r-- 1 nyoungblut abt3  519 Nov 30 16:07 rep_seqs.sh
-rw-r--r-- 1 nyoungblut abt3  535 Nov 3

## Demultiplex

### Georg

In [10]:
job = 'bash -c "source activate {}; {}"'
cmd = """qiime demux emp-paired \
  --i-seqs georg_seqs.qza \
  --m-barcodes-file {metadata} \
  --m-barcodes-category BarcodeSequence \
  --o-per-sample-sequences georg_demux.qza \
  --p-rev-comp-mapping-barcodes
"""
cmd = cmd.format(metadata=georg_map_file)
job = job.format(conda_env, cmd)
print(cmd)

qiime demux emp-paired   --i-seqs georg_seqs.qza   --m-barcodes-file /ebio/abt3_projects/Georg_animal_feces/data/mapping/mapping_from_georg_run/unified_ill_georgtest_map.txt   --m-barcodes-category BarcodeSequence   --o-per-sample-sequences georg_demux.qza   --p-rev-comp-mapping-barcodes



In [11]:
!$job 

[32mSaved SampleData[PairedEndSequencesWithQuality] to: georg_demux.qza[0m


In [12]:
!ls -thlc

total 17G
-rw-r--r-- 1 nyoungblut abt3 3.8G Dec  4 14:12 georg_demux.qza
-rw-r--r-- 1 nyoungblut abt3 6.9G Dec  4 11:37 tony_seqs.qza
-rw-r--r-- 1 nyoungblut abt3 6.2G Dec  4 11:24 georg_seqs.qza
drwxr-sr-x 2 nyoungblut abt3  114 Dec  4 11:10 georg
drwxr-sr-x 2 nyoungblut abt3  113 Dec  4 11:10 tony
drwxr-sr-x 2 nyoungblut abt3  153 Dec  1 13:42 phyloseq
drwxr-sr-x 5 nyoungblut abt3  251 Dec  1 12:23 tmp
-rw-r--r-- 1 nyoungblut abt3  567 Nov 30 20:35 rooted_tree.sh
-rw-r--r-- 1 nyoungblut abt3  535 Nov 30 19:47 mask_align.sh
-rw-r--r-- 1 nyoungblut abt3  541 Nov 30 19:36 align_seqs.sh
-rw-r--r-- 1 nyoungblut abt3  562 Nov 30 19:07 tax_filter.sh
-rw-r--r-- 1 nyoungblut abt3  570 Nov 30 19:03 seq_filter.sh
-rw-r--r-- 1 nyoungblut abt3  767 Nov 30 19:02 export.sh
-rw-r--r-- 1 nyoungblut abt3  501 Nov 30 18:51 view_tax.sh
-rw-r--r-- 1 nyoungblut abt3   75 Nov 30 18:12 screenlog.0
-rw-r--r-- 1 nyoungblut abt3  674 Nov 30 16:19 taxonomy.sh
-rw-r--r-- 1 nyoungblut abt3  519 N

#### Summarize data

In [13]:
job = 'bash -c "source activate {}; {}"'
cmd = """qiime demux summarize \
  --i-data georg_demux.qza \
  --o-visualization georg_demux.qzv
"""
job = job.format(conda_env, cmd)
print(job)

bash -c "source activate qiime2; qiime demux summarize   --i-data georg_demux.qza   --o-visualization georg_demux.qzv
"


In [14]:
!$job 

[32mSaved Visualization to: georg_demux.qzv[0m


In [15]:
!ls -thlc

total 17G
-rw-r--r-- 1 nyoungblut abt3 294K Dec  4 14:21 georg_demux.qzv
-rw-r--r-- 1 nyoungblut abt3 3.8G Dec  4 14:12 georg_demux.qza
-rw-r--r-- 1 nyoungblut abt3 6.9G Dec  4 11:37 tony_seqs.qza
-rw-r--r-- 1 nyoungblut abt3 6.2G Dec  4 11:24 georg_seqs.qza
drwxr-sr-x 2 nyoungblut abt3  114 Dec  4 11:10 georg
drwxr-sr-x 2 nyoungblut abt3  113 Dec  4 11:10 tony
drwxr-sr-x 2 nyoungblut abt3  153 Dec  1 13:42 phyloseq
drwxr-sr-x 5 nyoungblut abt3  251 Dec  1 12:23 tmp
-rw-r--r-- 1 nyoungblut abt3  567 Nov 30 20:35 rooted_tree.sh
-rw-r--r-- 1 nyoungblut abt3  535 Nov 30 19:47 mask_align.sh
-rw-r--r-- 1 nyoungblut abt3  541 Nov 30 19:36 align_seqs.sh
-rw-r--r-- 1 nyoungblut abt3  562 Nov 30 19:07 tax_filter.sh
-rw-r--r-- 1 nyoungblut abt3  570 Nov 30 19:03 seq_filter.sh
-rw-r--r-- 1 nyoungblut abt3  767 Nov 30 19:02 export.sh
-rw-r--r-- 1 nyoungblut abt3  501 Nov 30 18:51 view_tax.sh
-rw-r--r-- 1 nyoungblut abt3   75 Nov 30 18:12 screenlog.0
-rw-r--r-- 1 nyoungblut abt3  6

### Tony

In [None]:
job = 'bash -c "source activate {}; {}"'

cmd = """qiime demux emp-paired \
  --i-seqs tony_seqs.qza \
  --m-barcodes-file {metadata} \
  --m-barcodes-category BarcodeSequence \
  --o-per-sample-sequences tony_demux.qza \
  --p-rev-comp-mapping-barcodes
"""
cmd = cmd.format(metadata=tony_map_file)
job = job.format(conda_env, cmd)
print(job)

bash -c "source activate qiime2; qiime demux emp-paired   --i-seqs tony_seqs.qza   --m-barcodes-file /ebio/abt3_projects/Georg_animal_feces/data/mapping/mapping_from_tony_run/unified_ill_tony_map.txt   --m-barcodes-category BarcodeSequence   --o-per-sample-sequences tony_demux.qza   --p-rev-comp-mapping-barcodes
"


In [None]:
!$job 

In [None]:
!ls -thlc

#### Summarize data

In [None]:
job = 'bash -c "source activate {}; {}"'
cmd = """qiime demux summarize \
  --i-data tony_demux.qza \
  --o-visualization tony_demux.qzv
"""
job = job.format(conda_env, cmd)
print(job)

In [None]:
!$job 

In [None]:
!ls -thlc

# SessionInfo

In [25]:
import IPython
print(IPython.sys_info())

{'commit_hash': 'ca5443062',
 'commit_source': 'installation',
 'default_encoding': 'UTF-8',
 'ipython_path': '/ebio/abt3_projects/software/miniconda3/envs/qiime2/lib/python3.5/site-packages/IPython',
 'ipython_version': '6.2.1',
 'os_name': 'posix',
 'platform': 'Linux-4.4.67-x86_64-with-debian-stretch-sid',
 'sys_executable': '/ebio/abt3_projects/software/miniconda3/envs/qiime2/bin/python',
 'sys_platform': 'linux',
 'sys_version': '3.5.4 | packaged by conda-forge | (default, Aug 10 2017, '
                '01:38:41) \n'
                '[GCC 4.8.2 20140120 (Red Hat 4.8.2-15)]'}
