Skip to content

Commit

Permalink
Implements #11, #28, #29.
Browse files Browse the repository at this point in the history
  • Loading branch information
lentendu committed Jun 12, 2018
1 parent 5c0c344 commit a050835
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 114 deletions.
17 changes: 10 additions & 7 deletions bin/make_doc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,14 @@ then
echo "The read count was randomly normalized to $SUBSIZE reads in each samples."
echo ""
fi
if [[ $DB == *"silva"* ]] && [ $TECH == "454" ]
if [ $PRECLUST == "mothur" ]
then
PRECL=`for i in log/trim.*.out; do grep -B 1 "^pre.cluster removed" $i | grep -o '[0-9]\+' | paste - - | awk '{print $2/$1*100}' ; done | awk '{m+=$1}END{print m/NR}'`
echo "Dereplicated sequences of each sample were aligned against the reference alignment of the database $DB (version ${VERSION[DB]}, ${CITATION[DB]}) and pre-clustered following the 454 SOP (${CITATION[SOP]})."
echo "Dereplicated sequences of each sample were aligned against the reference alignment of the database $DB (version ${VERSION[DB]}, ${CITATION[DB]}) and pre-clustered following the mothur $TARG SOP (${CITATION[SOP]})."
echo "Approximately 5% of the reads with bad alignment were discarded after the alignment and pre-clustering reduced the number of sequences to analyse by an average of $PRECL %."
echo ""
CIT+=(SOP)
elif [ $TECH == "Illumina" ]
elif [ $PRECLUST == "cdhit454" ]
then
PRECL=`for i in log/trim.*.out; do grep "^ *[0-9].*clusters$" $i ; done | awk '{mean+=$3/$1*100}END{print mean/NR}'`
echo "The reads were pre-clustered in order to merge reads likely arising from sequencing errors (${CITATION[PRECL]}) and thus reducing the computational load."
Expand All @@ -171,16 +171,19 @@ then
echo ""
CIT+=(PRECL PRECDHIT)
fi
CHIMMEAN=$(grep "Removed [0-9]* sequences from your name file" log/trim.[0-9]*.out | cut -d " " -f 2 | awk '{c+=$1}END{printf "%.0d\n", c/NR}')
echo "An average of $CHIMMEAN chimeric reads were detected and removed from each sample using the UCHIME algorithm as implemented in MOTHUR (${CITATION[UCHIME]})."
CIT+=(UCHIME)
echo ""

if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
then
ITSXMEAN=$(while read num samp; do NAMES=$(tac log/trim.$num.out | sed -n "1,/unique\.seqs/{s/^.*name=\([^)]*\))$/processing\/$samp\/\1/p}") ; echo $(sed -n '$=' $NAMES) $(sed -n '$=' ${NAMES/itsx\.$ITSX\./}) ; done < <(awk '{print NR,$1}' config/lib4.list) | awk '{sum+=$1/$2*100}END{print sum/NR}')
echo "The $ITSX fragment was detected and extracted from $ITSXMEAN % of chimera free reads using ITSx (version ${VERSION[ITSX]}, ${CITATION[ITSX]}). "
CIT+=(ITSX)
echo ""
elif [ $CHIMERA == "before" ] || [ $CHIMERA == "both" ]
then
CHIMMEAN=$(grep "Removed [0-9]* sequences from your name file" log/trim.[0-9]*.out | cut -d " " -f 2 | awk '{c+=$1}END{printf "%.0d\n", c/NR}')
echo "An average of $CHIMMEAN chimeric reads were detected and removed from each sample using the UCHIME algorithm as implemented in MOTHUR (${CITATION[UCHIME]})."
CIT+=(UCHIME)
echo ""
fi

while read var val; do unset $var ; if [ $REF_SUBPROJECT == "no" ] ; then declare $var="$val" ; else declare $var="${val//$REF_SUBPROJECT/$SUBPROJECT}" ; fi ; done < config/merge_env.txt
Expand Down
2 changes: 2 additions & 0 deletions bin/variables_definition.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ MINQUAL 20 Minimum average quality on the trimmed sequence length ^[0-9]+$
MIN_DEPTH 2000 Minimum number of trimmed reads per sample ^[0-9]+$
ITSX no ITSx region to extract ^ITS[12]$
SUBSAMPLE no Subsampling no yes
PRECLUST cdhit454 Pre-clustering no cdhit454 mothur
CHIMERA after Chimera removal before after both
CLUST mcl Clustering algorithm mcl sumaclust cd-hit-est vsearch swarm
TRESH 97 Clustering similarity threshold ^[0-9]+(.[0-9]+)?$
PREV_PATH no Cluster with previous subproject reference sequences no ^/.+(/.+)*$
Expand Down
46 changes: 24 additions & 22 deletions src/OTU.step
Original file line number Diff line number Diff line change
Expand Up @@ -159,34 +159,36 @@ fi


# Chimera recheck
NSLOTS2=$(( $NCPUS / 2 ))
mothur "#chimera.uchime(fasta=$FASTA_OTUS.fasta, name=$NAMES_OTUS.names, group=$GROUP.groups, reference=self, processors=$NSLOTS2);
get.current()"
unset LOG ACCNOS
LOG=`ls mothur.*.logfile | tail -1`
ACCNOS=`sed -n '/get.current()/,$p' $LOG | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`

if [ -s $ACCNOS.accnos ]
if [ $CHIMERA == "after" ] || [ $CHIMERA == "both" ]
then
mothur "#remove.seqs(accnos=$ACCNOS.accnos, name=$NAMES_OTUS.names, fasta=$FASTA_OTUS.fasta);
NSLOTS2=$(( $NCPUS / 2 ))
mothur "#chimera.uchime(fasta=$FASTA_OTUS.fasta, name=$NAMES_OTUS.names, group=$GROUP.groups, reference=self, processors=$NSLOTS2);
get.current()"
unset LOG
unset LOG ACCNOS
LOG=`ls mothur.*.logfile | tail -1`
NAMES_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
FASTA_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
ACCNOS=`sed -n '/get.current()/,$p' $LOG | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`

mothur "#list.seqs(name=$NAMES_OTUS.names);
get.seqs(accnos=current, fasta=$FASTA.fasta, list=$LIST.list, name=$NAMES.names, group=$GROUP.groups);
get.current()"
unset LOG FASTA NAMES GROUP LIST
LOG=`ls mothur.*.logfile | tail -1`
FASTA=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
GROUP=`sed -n '/get.current()/,$p' $LOG | grep "^group=" | sed 's/group=//;s/\.groups//'`
LIST=`sed -n '/get.current()/,$p' $LOG | grep "^list=" | sed 's/list=//;s/\.list//'`
if [ -s $ACCNOS.accnos ]
then
mothur "#remove.seqs(accnos=$ACCNOS.accnos, name=$NAMES_OTUS.names, fasta=$FASTA_OTUS.fasta);
get.current()"
unset LOG
LOG=`ls mothur.*.logfile | tail -1`
NAMES_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
FASTA_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`

mothur "#list.seqs(name=$NAMES_OTUS.names);
get.seqs(accnos=current, fasta=$FASTA.fasta, list=$LIST.list, name=$NAMES.names, group=$GROUP.groups);
get.current()"
unset LOG FASTA NAMES GROUP LIST
LOG=`ls mothur.*.logfile | tail -1`
FASTA=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
GROUP=`sed -n '/get.current()/,$p' $LOG | grep "^group=" | sed 's/group=//;s/\.groups//'`
LIST=`sed -n '/get.current()/,$p' $LOG | grep "^list=" | sed 's/list=//;s/\.list//'`
fi
fi


# Save newly set and updated variables
cd ..
comm -23 <(set -o posix; set | sort) <(sort config/OTU.set) | tr "=" "\t" | grep -v "^[a-z]" | grep -P -v "^PWD\t" | sed "s/'//g" > config/OTU_env.txt
Expand Down
81 changes: 53 additions & 28 deletions src/deltamp.main
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,56 @@ else
exit 1
fi


# Databases
if [ $TARG == "ITS" ]
then
if [ $CUT_DB == "yes" ]
then
DBCUT=$DB.$ITSX
fi
if [ $ASSIGN_FUNCT == "yes" ]
then
CITATION[FUN]="Nguyen et al., 2016"
FULLCITATION[FUN]="Nguyen, N. H., Song, Z., Bates, S. T., Branco, S., Tedersoo, L., Menke, J., … Kennedy, P. G. (2016). FUNGuild: An open annotation tool for parsing fungal community datasets by ecological guild. Fungal Ecology, 20, 241–248."
VERSION[FUN]=1.1
fi
fi

if ( [ $TARG == "18S" ] || [ $TARG == "16S" ] ) && [ $CUT_DB == "yes" ]
then
if [ $TECH == "454" ] && [ $FLIP == "true" ]
then
DBCUT=$RVS_NAME.$FWD_NAME.$DB
else
DBCUT=$FWD_NAME.$RVS_NAME.$DB
fi
fi

DBALIGN=$DB.align
if [ -f $DBFOLD/$DBALIGN.fasta ] && [ $CUT_DB == "yes" ]
then
DBCHOP=$DBCUT.align
fi

# Pre-clustering
if [ $CLUST == "swarm" ] && [ $PRECLUST != "no" ]
then
echo "# Configuration file error: pre-clustering is not allowed for swarm based OTU clustering. Turn off the 'Pre-clustering' argument with 'no' or use another clustering algorithm." >&2
show_doc | fmt -s -w $(tput cols) >&2
exit 1
fi

if [ $PRECLUST == "mothur" ]
then
if [ ! -f $DBFOLD/$DBALIGN.fasta ]
then
echo "# Configuration file error: the mothur pre-clustering method could only be used if an aligned version of the reference database is available." >&2
show_doc | fmt -s -w $(tput cols) >&2
exit 1
fi
fi

# Dependencies
# modules
# gnu parallel
Expand Down Expand Up @@ -294,6 +344,9 @@ then

elif [ $TECH == "Illumina" ]
then
# MiSeq SOP
CITATION[SOP]="Kozich et al., 2013"
FULLCITATION[SOP]="Kozich, J. J., Westcott, S. L., Baxter, N. T., Highlander, S. K., & Schloss, P. D. (2013). Development of a dual-index sequencing strategy and curation pipeline for analyzing amplicon sequence data on the MiSeq Illumina sequencing platform. Applied and Environmental Microbiology, AEM.01043-13. doi:10.1128/AEM.01043-13"
# PANDAseq
CITATION[PANDA]="Masella et al., 2012"
FULLCITATION[PANDA]="Masella AP, Bartram AK, Truszkowski JM, Brown DG, Neufeld JD (2012) PANDAseq: paired-end assembler for illumina sequences. BMC Bioinformatics, 13, 31."
Expand Down Expand Up @@ -400,34 +453,6 @@ then
paste <(cut -f 1 config/lib3.list) <(cut -f 1 config/lib2.list) | sort -k 1,1 | awk '{if(NR==1){S=$1;printf "%s %s",S,$2} else {if($1==S){printf " %s",$2} else {S=$1;printf "\n%s %s",S,$2}}}END{printf "\n"}' > config/lib4.list
fi
SAMP_SIZE=`sed -n '$=' config/lib4.list`

# Databases
if [ $TARG == "ITS" ]
then
if [ $CUT_DB == "yes" ]
then
DBCUT=$DB.$ITSX
fi
CITATION[FUN]="Nguyen et al., 2016"
FULLCITATION[FUN]="Nguyen, N. H., Song, Z., Bates, S. T., Branco, S., Tedersoo, L., Menke, J., … Kennedy, P. G. (2016). FUNGuild: An open annotation tool for parsing fungal community datasets by ecological guild. Fungal Ecology, 20, 241–248."
VERSION[FUN]=1.1
fi

if ( [ $TARG == "18S" ] || [ $TARG == "16S" ] ) && [ $CUT_DB == "yes" ]
then
if [ $TECH == "454" ] && [ $FLIP == "true" ]
then
DBCUT=$RVS_NAME.$FWD_NAME.$DB
else
DBCUT=$FWD_NAME.$RVS_NAME.$DB
fi
fi

DBALIGN=$DB.align
if [ -f $DBFOLD/$DBALIGN.fasta ] && [ $CUT_DB == "yes" ]
then
DBCHOP=$DBCUT.align
fi

# Save newly set variables to file (avoiding lowercase variables from loops):
join -v 1 <(set -o posix; set | tr "=" "\t" | sort -k 1,1 ) <(cat /tmp/amp.$STIME.set | tr "=" "\t" | sort -k 1,1 ) | grep -v "^[a-z]\|BASH_REMATCH" | sed "s/'//g" > config/env.txt
Expand Down
105 changes: 48 additions & 57 deletions src/trim.step
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module load DeltaMP/${VERSION[DELTAMP]}
LIB=(`sed -n "$ARRAY_TASK"'p' config/lib4.list`)
SAMP=${LIB[0]}
LIB_NAME=(`echo ${LIB[@]} | cut -d " " -f 2-`)
LOG=mothur.${TECH}_trim.$SAMP.logfile
cd processing && mkdir $SAMP
cd $SAMP

Expand Down Expand Up @@ -79,19 +80,19 @@ do
else
if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
then
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile);
mothur "#set.logfile(name=$LOG, append=T);
set.dir(input=../../libraries/fasta, output=./);
trim.seqs(fasta=$i.fasta, oligos=oligos.$i, qfile=$i.qual, flip=$FLIP, qaverage=$QUAL, bdiffs=$BDIFFS, pdiffs=$PDIFFS, maxambig=$MAXAMBIG, maxhomop=$MAXHOMOP, minlength=$LENGTH)"
else
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile);
mothur "#set.logfile(name=$LOG, append=T);
set.dir(input=../../libraries/fasta, output=./);
trim.seqs(fasta=$i.fasta, oligos=oligos.$i, qfile=$i.qual, flip=$FLIP, qaverage=$QUAL, bdiffs=$BDIFFS, pdiffs=$PDIFFS, maxambig=$MAXAMBIG, maxhomop=$MAXHOMOP, minlength=$LENGTH, keepfirst=$LENGTH)"
fi
fi
elif [ $TECH == "Illumina" ]
then
QUAL=`cat $EXEC/quality_check/optimized.quality.txt`
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile);
mothur "#set.logfile(name=$LOG, append=T);
set.dir(input=../../libraries/fasta, output=./);
trim.seqs(fasta=$i.pairend.fasta, qfile=$i.pairend.qual, qaverage=$QUAL, maxambig=$MAXAMBIG, maxhomop=$MAXHOMOP, minlength=$MINLEN, maxlength=$MAXLEN)"
fi
Expand All @@ -106,85 +107,88 @@ then
SIZE=$(grep "Minimum" ../../quality_check/$SUBPROJECT.summary.stat.tsv | cut -f 3 | cut -d "." -f 1)
if [ $TECH == "454" ] && [ "$DENOISE" == "yes" ]
then
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
mothur "#set.logfile(name=$LOG, append=T);
unique.seqs(fasta=$SAMP.trim.fasta, name=$SAMP.names);
sub.sample(fasta=current, name=current, size=$SIZE);
get.current()"
else
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
mothur "#set.logfile(name=$LOG, append=T);
unique.seqs(fasta=$SAMP.trim.fasta);
sub.sample(fasta=current, name=current, size=$SIZE);
get.current()"
fi
else
if [ $TECH == "454" ] && [ "$DENOISE" == "yes" ]
then
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
mothur "#set.logfile(name=$LOG, append=T);
unique.seqs(fasta=$SAMP.trim.fasta, name=$SAMP.names);
get.current()"
else
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
mothur "#set.logfile(name=$LOG, append=T);
unique.seqs(fasta=$SAMP.trim.fasta);
get.current()"
fi
fi

#store current files in variables
unset LOG FASTA NAMES
LOG=mothur.${TECH}_trim.$SAMP.logfile
unset FASTA NAMES
FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`

# align to reference (silva DB only), pre-cluster (silva DB and/or Illumina), and chimera removal
if [ ! -z $DBCHOP ] && [ $TECH == "454" ]
# Precluster
if [ $PRECLUST == "mothur" ]
then
PRECDIFF=$(( ${LENGTH} / 100 ))
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
set.dir(tempdefault=/data/ecogen/databases/mothur);
align.seqs(candidate=$FASTA.fasta, template=$DBCHOP.fasta, align=needleman);
screen.seqs(fasta=current, name=$NAMES.names, optimize=start-end, criteria=95);
filter.seqs(fasta=current, vertical=T, trump=.);
pre.cluster(fasta=current, name=current, diffs=$PRECDIFF);
chimera.uchime(fasta=current, name=current, reference=self);
get.current()"
unset FASTA NAMES ACCNOS
FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
ACCNOS=`tac $LOG | sed -n '1,/get.current()/p' | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
if [ -s $ACCNOS.accnos ]
# pre-cluster at max 1% after aligning to reference and removing the 5 % worse aligned reads
if [ $TECH == "454" ] && [ $ITSX == "no" ]
then
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
remove.seqs(accnos=$ACCNOS.accnos, fasta=$FASTA.fasta, name=$NAMES.names);
PRECDIFF=$(( ${LENGTH} / 100 ))
else
PRECDIFF=$(twofasta $FASTA.fasta | awk -v M=$MAXLEN 'BEGIN{min=M}length($1)<M{min=length($1)}END{print int(min/100)'awk '$1!~"^>"{sum+=length($1)}END{print int(sum/(FNR/2)/100)}')
fi
if [ $PRECDIFF -ge 1 ]
then
mothur "#set.logfile(name=$LOG, append=T);
set.dir(tempdefault=/data/ecogen/databases/mothur);
align.seqs(candidate=$FASTA.fasta, template=$DBCHOP.fasta, align=needleman);
screen.seqs(fasta=current, name=$NAMES.names, optimize=start-end, criteria=95);
filter.seqs(fasta=current, vertical=T, trump=.);
pre.cluster(fasta=current, name=current, diffs=$PRECDIFF);
get.current()"
unset FASTA NAMES
FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
else
echo "The pre-culstering step was skipped because the smallest sequence have less than 100nt."
fi
elif [ $TECH == "Illumina" ]
elif [ $PRECLUST == "cdhit454" ]
then
# pre-cluster with cd-hit-454, allowing 1% dissimilarity with indel of size 1 at max
# pre-cluster with cd-hit-454, allowing 1% dissimilarity with indel of max 1 nt long
cdhit4542mothur () {
bak=$1 ; fasta=$2 ; names=$3
nl $bak | sed 's/>//;s/\.\.\.//' | awk '{print $4,$1,$2}' | sort -k 1,1 | join - <(sort -k 1,1 $names) | awk '{size=split($4,a,",");print $0,size}' | sort -k 3,3n -k 5,5nr | awk '{if(NR==1){nb=$3;printf "%s\t%s\t%s", $1,$2,$4} else {if($3==nb){printf ",%s", $4} else {nb=$3;printf "\n%s\t%s\t%s",$1,$2,$4}}}END{printf "\n"}' | sort -k 2,2n | cut -f 1,3 > ${names%.*}.precl.names
obigrep --uppercase --id-list=<(cut -f 1 ${names%.*}.precl.names) $fasta| twofasta > ${fasta%.*}.precl.fasta
echo "cdhit4542mothur output files:#${fasta%.*}.precl.fasta#${names%.*}.precl.names" | tr "#" "\n"
obigrep --uppercase --id-list=<(cut -f 1 ${names%.*}.precl.names) $fasta | twofasta > ${fasta%.*}.precl.fasta
}
cd-hit-454 -B 1 -g 1 -M 6000 -c 0.99 -bak 1 -i $FASTA.fasta -o $FASTA.precl.fasta.temp
cdhit4542mothur $FASTA.precl.fasta.temp.bak.clstr $FASTA.fasta $NAMES.names
rm $FASTA.precl.fasta.temp*
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
chimera.uchime(fasta=$FASTA.precl.fasta, name=$NAMES.precl.names, reference=self);
FASTA=$FASTA.precl
NAMES=$NAMES.precl
fi

# [ITSx and] Chimera removal
if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
then
ITSx -i $FASTA.fasta --save_regions $ITSX --preserve T --graphical F -o $FASTA.itsx
awk '$0~"^>"{sub(">","");print $1}' $FASTA.itsx.$ITSX.fasta | sort | join - <(sort -k 1,1 $NAMES.names) > $NAMES.itsx.$ITSX.names
mothur "#set.logfile(name=$LOG, append=T);
unique.seqs(fasta=$FASTA.itsx.$ITSX.fasta, name=$NAMES.itsx.$ITSX.names);
get.current()"
unset FASTA NAMES ACCNOS
unset FASTA NAMES
FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
ACCNOS=`tac $LOG | sed -n '1,/get.current()/p' | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
if [ -s $ACCNOS.accnos ]
then
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
remove.seqs(accnos=$ACCNOS.accnos, fasta=$FASTA.fasta, name=$NAMES.names);
get.current()"
fi
else
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
elif [ $CHIMERA == "before" ] || [ $CHIMERA == "both" ]
then
mothur "#set.logfile(name=$LOG, append=T);
chimera.uchime(fasta=$FASTA.fasta, name=$NAMES.names, reference=self);
get.current()"
unset FASTA NAMES ACCNOS
Expand All @@ -193,25 +197,12 @@ else
ACCNOS=`tac $LOG | sed -n '1,/get.current()/p' | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
if [ -s $ACCNOS.accnos ]
then
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
mothur "#set.logfile(name=$LOG, append=T);
remove.seqs(accnos=$ACCNOS.accnos, fasta=$FASTA.fasta, name=$NAMES.names);
get.current()"
fi
fi

# ITSx
if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
then
unset FASTA NAMES
FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
ITSx -i $FASTA.fasta --save_regions $ITSX --preserve T --graphical F -o $FASTA.itsx
awk '$0~"^>"{sub(">","");print $1}' $FASTA.itsx.$ITSX.fasta | sort | join - <(sort -k 1,1 $NAMES.names) > $NAMES.itsx.$ITSX.names
mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
unique.seqs(fasta=$FASTA.itsx.$ITSX.fasta, name=$NAMES.itsx.$ITSX.names);
get.current()"
fi

# list files and directories
. $BIN/list_step_files.sh

Expand Down

0 comments on commit a050835

Please sign in to comment.