# Viewing & validating the mapping file

## File location

In [1]:
ls -thlc ../../data/Qiime_Introduction_Tutorial/

total 1.7M
drwxr-xr-x 2 nyoungblut ebio  132 Sep 14 20:46 [0m[01;34msplit_libraries[0m/
-rw-r--r-- 1 nyoungblut ebio 118K Sep 14 20:46 rep_set.tre
-rw-r--r-- 1 nyoungblut ebio 542K Sep 14 20:46 rep_set.fna
-rw-r--r-- 1 nyoungblut ebio 1.1M Sep 14 20:46 otu_table.biom
-rw-r--r-- 1 nyoungblut ebio 2.9K Sep 14 20:46 qiime_processing_workflow_local.sh
-rw------- 1 nyoungblut ebio 3.0K Sep 14 20:46 mapping_file.txt
-rw-r--r-- 1 nyoungblut ebio  108 Sep 14 20:46 16s_pickotu_param.txt


In [2]:
%cd ../../data/Qiime_Introduction_Tutorial/

/ebio/abt3_projects/software/tmp/data_analysis_workshops/data/Qiime_Introduction_Tutorial


In [3]:
ls -thlc 

total 1.7M
drwxr-xr-x 2 nyoungblut ebio  132 Sep 14 20:46 [0m[01;34msplit_libraries[0m/
-rw-r--r-- 1 nyoungblut ebio 118K Sep 14 20:46 rep_set.tre
-rw-r--r-- 1 nyoungblut ebio 542K Sep 14 20:46 rep_set.fna
-rw-r--r-- 1 nyoungblut ebio 1.1M Sep 14 20:46 otu_table.biom
-rw-r--r-- 1 nyoungblut ebio 2.9K Sep 14 20:46 qiime_processing_workflow_local.sh
-rw------- 1 nyoungblut ebio 3.0K Sep 14 20:46 mapping_file.txt
-rw-r--r-- 1 nyoungblut ebio  108 Sep 14 20:46 16s_pickotu_param.txt


## Loading file with pandas

In [4]:
import pandas as pd

In [5]:
pd.read_csv('mapping_file.txt', sep='\t')

Unnamed: 0,#SampleID,BarcodeSequence,LinkerPrimerSequence,SampleType,Year,Month,Day,Subject,ReportedAntibioticUsage,DaysSinceExperimentStart,Description
0,L1S8,AGCTGACTAGTC,GTGCCAGCMGCCGCGGTAA,gut,2008,10,28,1,Yes,0,1_Fece_10_28_2008
1,L1S140,ATGGCAGCTCTA,GTGCCAGCMGCCGCGGTAA,gut,2008,10,28,2,Yes,0,2_Fece_10_28_2008
2,L1S57,ACACACTATGGC,GTGCCAGCMGCCGCGGTAA,gut,2009,1,20,1,No,84,1_Fece_1_20_2009
3,L1S208,CTGAGATACGCG,GTGCCAGCMGCCGCGGTAA,gut,2009,1,20,2,No,84,2_Fece_1_20_2009
4,L1S76,ACTACGTGTGGT,GTGCCAGCMGCCGCGGTAA,gut,2009,2,17,1,No,112,1_Fece_2_17_2009
5,L1S105,AGTGCGATGCGT,GTGCCAGCMGCCGCGGTAA,gut,2009,3,17,1,No,140,1_Fece_3_17_2009
6,L1S257,CCGACTGAGATG,GTGCCAGCMGCCGCGGTAA,gut,2009,3,17,2,No,140,2_Fece_3_17_2009
7,L1S281,CCTCTCGTGATC,GTGCCAGCMGCCGCGGTAA,gut,2009,4,14,2,No,168,2_Fece_4_14_2009
8,L2S240,CATATCGCAGTT,GTGCCAGCMGCCGCGGTAA,left palm,2008,10,28,2,Yes,0,2_L_Palm_10_28_2008
9,L2S155,ACGATGCGACCA,GTGCCAGCMGCCGCGGTAA,left palm,2009,1,20,1,No,84,1_L_Palm_1_20_2009


## Validating mapping file

> Note: Python 3 is used by default for BASH, and QIIME is only Python 2

In [6]:
%%bash
validate_mapping_file.py -h

bash: line 1: validate_mapping_file.py: command not found


In [7]:
%%bash
# python2 or python3?
python --version

Python 3.6.1 :: Continuum Analytics, Inc.


In [None]:
%%bash
# activate conda environment
source activate py2
validate_mapping_file.py -h

Usage: validate_mapping_file.py [options] {-m/--mapping_fp MAPPING_FP}

[] indicates optional input (order unimportant)
{} indicates required input (order unimportant)

Specifically, we check that:

    - The BarcodeSequence, LinkerPrimerSequences, and ReversePrimer fields
       have valid IUPAC DNA characters, and BarcodeSequence characters
       are non-degenerate (error)
    - The SampleID, BarcodeSequence, LinkerPrimerSequence, and Description
       headers are present. (error)
    - There are not duplicate header fields (error)
    - There are not duplicate barcodes (error)
    - Barcodes are of the same length.  Suppressed when
    - The headers do not contain invalid characters (alphanumeric and
    - The data fields do not contain invalid characters (alphanumeric,
    - SampleID fields are MIENS compliant (only alphanumeric
    - There are no duplicates when the primer and variable length
       barcodes are appended (error)
    - There are no duplicates when barcodes and ad

### Validating (actuall call)

In [None]:
%%bash
source activate py2
# qiime: validate mapping file script
validate_mapping_file.py \
    -m mapping_file.txt \
    -o validate_map

In [None]:
ls -thlc validate_map

### View html output

In [None]:
from IPython.core.display import display, HTML

with open('validate_map/mapping_file.html') as f:
    output = f.read()
display(HTML(output))