Implements #11, #28, #29.

lentendu · Jun 12, 2018 · a050835 · a050835
1 parent 5c0c344
commit a050835
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 114 deletions.
diff --git a/bin/make_doc.sh b/bin/make_doc.sh
@@ -155,14 +155,14 @@ then
 	echo "The read count was randomly normalized to $SUBSIZE reads in each samples."
 	echo ""
 fi
-if [[ $DB == *"silva"* ]] && [ $TECH == "454" ]
+if [ $PRECLUST == "mothur" ]
 then
 	PRECL=`for i in log/trim.*.out; do grep -B 1 "^pre.cluster removed" $i | grep -o '[0-9]\+' | paste - - | awk '{print $2/$1*100}' ; done | awk '{m+=$1}END{print m/NR}'`
-	echo "Dereplicated sequences of each sample were aligned against the reference alignment of the database $DB (version ${VERSION[DB]}, ${CITATION[DB]}) and pre-clustered following the 454 SOP (${CITATION[SOP]})."
+	echo "Dereplicated sequences of each sample were aligned against the reference alignment of the database $DB (version ${VERSION[DB]}, ${CITATION[DB]}) and pre-clustered following the mothur $TARG SOP (${CITATION[SOP]})."
 	echo "Approximately 5% of the reads with bad alignment were discarded after the alignment and pre-clustering reduced the number of sequences to analyse by an average of $PRECL %."
 	echo ""
 	CIT+=(SOP)
-elif [ $TECH == "Illumina" ]
+elif [ $PRECLUST == "cdhit454" ]
 then
 	PRECL=`for i in log/trim.*.out; do grep "^ *[0-9].*clusters$" $i ; done | awk '{mean+=$3/$1*100}END{print mean/NR}'`
 	echo "The reads were pre-clustered in order to merge reads likely arising from sequencing errors (${CITATION[PRECL]}) and thus reducing the computational load."
@@ -171,16 +171,19 @@ then
 	echo ""
 	CIT+=(PRECL PRECDHIT)
 fi
-CHIMMEAN=$(grep "Removed [0-9]* sequences from your name file" log/trim.[0-9]*.out | cut -d " " -f 2 | awk '{c+=$1}END{printf "%.0d\n", c/NR}')
-echo "An average of $CHIMMEAN chimeric reads were detected and removed from each sample using the UCHIME algorithm as implemented in MOTHUR (${CITATION[UCHIME]})."
-CIT+=(UCHIME)
-echo ""
+
 if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
 then
 	ITSXMEAN=$(while read num samp; do NAMES=$(tac log/trim.$num.out | sed -n "1,/unique\.seqs/{s/^.*name=\([^)]*\))$/processing\/$samp\/\1/p}") ; echo $(sed -n '$=' $NAMES) $(sed -n '$=' ${NAMES/itsx\.$ITSX\./}) ; done < <(awk '{print NR,$1}' config/lib4.list) | awk '{sum+=$1/$2*100}END{print sum/NR}')
 	echo "The $ITSX fragment was detected and extracted from $ITSXMEAN % of chimera free reads using ITSx (version ${VERSION[ITSX]}, ${CITATION[ITSX]}). "
 	CIT+=(ITSX)
 	echo ""
+elif [ $CHIMERA == "before" ] || [ $CHIMERA == "both" ]
+then
+	CHIMMEAN=$(grep "Removed [0-9]* sequences from your name file" log/trim.[0-9]*.out | cut -d " " -f 2 | awk '{c+=$1}END{printf "%.0d\n", c/NR}')
+	echo "An average of $CHIMMEAN chimeric reads were detected and removed from each sample using the UCHIME algorithm as implemented in MOTHUR (${CITATION[UCHIME]})."
+	CIT+=(UCHIME)
+	echo ""
 fi
 
 while read var val; do unset $var ; if [ $REF_SUBPROJECT == "no" ] ; then declare $var="$val" ; else declare $var="${val//$REF_SUBPROJECT/$SUBPROJECT}" ; fi ; done < config/merge_env.txt

diff --git a/bin/variables_definition.tsv b/bin/variables_definition.tsv
@@ -20,6 +20,8 @@ MINQUAL	20	Minimum average quality on the trimmed sequence length	^[0-9]+$
 MIN_DEPTH	2000	Minimum number of trimmed reads per sample	^[0-9]+$
 ITSX	no	ITSx region to extract	^ITS[12]$
 SUBSAMPLE	no	Subsampling	no yes
+PRECLUST	cdhit454	Pre-clustering	no cdhit454 mothur
+CHIMERA	after	Chimera removal 	before after both
 CLUST	mcl	Clustering algorithm	mcl sumaclust cd-hit-est vsearch swarm
 TRESH	97	Clustering similarity threshold	^[0-9]+(.[0-9]+)?$
 PREV_PATH	no	Cluster with previous subproject reference sequences	no ^/.+(/.+)*$

diff --git a/src/OTU.step b/src/OTU.step
@@ -159,34 +159,36 @@ fi
 
 
 # Chimera recheck
-NSLOTS2=$(( $NCPUS / 2 ))
-mothur "#chimera.uchime(fasta=$FASTA_OTUS.fasta, name=$NAMES_OTUS.names, group=$GROUP.groups, reference=self, processors=$NSLOTS2);
-get.current()"
-unset LOG ACCNOS
-LOG=`ls mothur.*.logfile | tail -1`
-ACCNOS=`sed -n '/get.current()/,$p' $LOG | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
-
-if [ -s $ACCNOS.accnos ]
+if [ $CHIMERA == "after" ] || [ $CHIMERA == "both" ]
 then
-	mothur "#remove.seqs(accnos=$ACCNOS.accnos, name=$NAMES_OTUS.names, fasta=$FASTA_OTUS.fasta);
+	NSLOTS2=$(( $NCPUS / 2 ))
+	mothur "#chimera.uchime(fasta=$FASTA_OTUS.fasta, name=$NAMES_OTUS.names, group=$GROUP.groups, reference=self, processors=$NSLOTS2);
 	get.current()"
-	unset LOG
+	unset LOG ACCNOS
 	LOG=`ls mothur.*.logfile | tail -1`
-	NAMES_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
-	FASTA_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
+	ACCNOS=`sed -n '/get.current()/,$p' $LOG | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
 
-	mothur "#list.seqs(name=$NAMES_OTUS.names);
-	get.seqs(accnos=current, fasta=$FASTA.fasta, list=$LIST.list, name=$NAMES.names, group=$GROUP.groups);
-	get.current()"
-	unset LOG FASTA NAMES GROUP LIST
-	LOG=`ls mothur.*.logfile | tail -1`
-	FASTA=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
-	NAMES=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
-	GROUP=`sed -n '/get.current()/,$p' $LOG | grep "^group=" | sed 's/group=//;s/\.groups//'`
-	LIST=`sed -n '/get.current()/,$p' $LOG | grep "^list=" | sed 's/list=//;s/\.list//'`
+	if [ -s $ACCNOS.accnos ]
+	then
+		mothur "#remove.seqs(accnos=$ACCNOS.accnos, name=$NAMES_OTUS.names, fasta=$FASTA_OTUS.fasta);
+		get.current()"
+		unset LOG
+		LOG=`ls mothur.*.logfile | tail -1`
+		NAMES_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
+		FASTA_OTUS=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
+
+		mothur "#list.seqs(name=$NAMES_OTUS.names);
+		get.seqs(accnos=current, fasta=$FASTA.fasta, list=$LIST.list, name=$NAMES.names, group=$GROUP.groups);
+		get.current()"
+		unset LOG FASTA NAMES GROUP LIST
+		LOG=`ls mothur.*.logfile | tail -1`
+		FASTA=`sed -n '/get.current()/,$p' $LOG | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
+		NAMES=`sed -n '/get.current()/,$p' $LOG | grep "^name=" | sed 's/name=//;s/\.names//'`
+		GROUP=`sed -n '/get.current()/,$p' $LOG | grep "^group=" | sed 's/group=//;s/\.groups//'`
+		LIST=`sed -n '/get.current()/,$p' $LOG | grep "^list=" | sed 's/list=//;s/\.list//'`
+	fi
 fi
 
-
 # Save newly set and updated variables
 cd ..
 comm -23 <(set -o posix; set | sort) <(sort config/OTU.set) | tr "=" "\t" | grep -v "^[a-z]" | grep -P -v "^PWD\t" | sed "s/'//g" > config/OTU_env.txt

diff --git a/src/deltamp.main b/src/deltamp.main
@@ -238,6 +238,56 @@ else
 	exit 1
 fi
 
+
+# Databases
+if [ $TARG == "ITS" ]
+then
+	if [ $CUT_DB == "yes" ]
+	then
+		DBCUT=$DB.$ITSX
+	fi
+	if [ $ASSIGN_FUNCT == "yes" ]
+	then
+		CITATION[FUN]="Nguyen et al., 2016"
+		FULLCITATION[FUN]="Nguyen, N. H., Song, Z., Bates, S. T., Branco, S., Tedersoo, L., Menke, J., … Kennedy, P. G. (2016). FUNGuild: An open annotation tool for parsing fungal community datasets by ecological guild. Fungal Ecology, 20, 241–248."
+		VERSION[FUN]=1.1
+	fi
+fi
+
+if ( [ $TARG == "18S" ] || [ $TARG == "16S" ] ) && [ $CUT_DB == "yes" ]
+then
+	if [ $TECH == "454" ] && [ $FLIP == "true" ]
+	then
+		DBCUT=$RVS_NAME.$FWD_NAME.$DB
+	else
+		DBCUT=$FWD_NAME.$RVS_NAME.$DB
+	fi
+fi
+
+DBALIGN=$DB.align
+if [ -f $DBFOLD/$DBALIGN.fasta ] && [ $CUT_DB == "yes" ]
+then
+	DBCHOP=$DBCUT.align
+fi
+
+# Pre-clustering
+if [ $CLUST == "swarm" ] && [ $PRECLUST != "no" ]
+then
+	echo "# Configuration file error: pre-clustering is not allowed for swarm based OTU clustering. Turn off the 'Pre-clustering' argument with 'no' or use another clustering algorithm." >&2
+	show_doc | fmt -s -w $(tput cols) >&2
+	exit 1
+fi
+
+if [ $PRECLUST == "mothur" ]
+then
+	if [ ! -f $DBFOLD/$DBALIGN.fasta ]
+	then
+		echo "# Configuration file error: the mothur pre-clustering method could only be used if an aligned version of the reference database is available." >&2
+		show_doc | fmt -s -w $(tput cols) >&2
+		exit 1
+	fi
+fi
+
 # Dependencies
 # modules
 # gnu parallel
@@ -294,6 +344,9 @@ then
 
 elif [ $TECH == "Illumina" ]
 then
+	# MiSeq SOP
+	CITATION[SOP]="Kozich et al., 2013"
+	FULLCITATION[SOP]="Kozich, J. J., Westcott, S. L., Baxter, N. T., Highlander, S. K., & Schloss, P. D. (2013). Development of a dual-index sequencing strategy and curation pipeline for analyzing amplicon sequence data on the MiSeq Illumina sequencing platform. Applied and Environmental Microbiology, AEM.01043-13. doi:10.1128/AEM.01043-13"
 	# PANDAseq
 	CITATION[PANDA]="Masella et al., 2012"
 	FULLCITATION[PANDA]="Masella AP, Bartram AK, Truszkowski JM, Brown DG, Neufeld JD (2012) PANDAseq: paired-end assembler for illumina sequences. BMC Bioinformatics, 13, 31."
@@ -400,34 +453,6 @@ then
 	paste <(cut -f 1 config/lib3.list) <(cut -f 1 config/lib2.list) | sort -k 1,1 | awk '{if(NR==1){S=$1;printf "%s %s",S,$2} else {if($1==S){printf " %s",$2} else {S=$1;printf "\n%s %s",S,$2}}}END{printf "\n"}' > config/lib4.list
 fi
 SAMP_SIZE=`sed -n '$=' config/lib4.list`
-
-# Databases
-if [ $TARG == "ITS" ]
-then
-	if [ $CUT_DB == "yes" ]
-	then
-		DBCUT=$DB.$ITSX
-	fi
-	CITATION[FUN]="Nguyen et al., 2016"
-	FULLCITATION[FUN]="Nguyen, N. H., Song, Z., Bates, S. T., Branco, S., Tedersoo, L., Menke, J., … Kennedy, P. G. (2016). FUNGuild: An open annotation tool for parsing fungal community datasets by ecological guild. Fungal Ecology, 20, 241–248."
-	VERSION[FUN]=1.1
-fi
-
-if ( [ $TARG == "18S" ] || [ $TARG == "16S" ] ) && [ $CUT_DB == "yes" ]
-then
-	if [ $TECH == "454" ] && [ $FLIP == "true" ]
-	then
-		DBCUT=$RVS_NAME.$FWD_NAME.$DB
-	else
-		DBCUT=$FWD_NAME.$RVS_NAME.$DB
-	fi
-fi
-
-DBALIGN=$DB.align
-if [ -f $DBFOLD/$DBALIGN.fasta ] && [ $CUT_DB == "yes" ]
-then
-	DBCHOP=$DBCUT.align
-fi
 
 # Save newly set variables to file (avoiding lowercase variables from loops):
 join -v 1 <(set -o posix; set | tr "=" "\t" | sort -k 1,1 ) <(cat /tmp/amp.$STIME.set | tr "=" "\t" | sort -k 1,1 ) | grep -v "^[a-z]\|BASH_REMATCH" | sed "s/'//g" > config/env.txt

diff --git a/src/trim.step b/src/trim.step
@@ -9,6 +9,7 @@ module load DeltaMP/${VERSION[DELTAMP]}
 LIB=(`sed -n "$ARRAY_TASK"'p' config/lib4.list`)
 SAMP=${LIB[0]}
 LIB_NAME=(`echo ${LIB[@]} | cut -d " " -f 2-`)
+LOG=mothur.${TECH}_trim.$SAMP.logfile
 cd processing && mkdir $SAMP
 cd $SAMP
 
@@ -79,19 +80,19 @@ do
 		else
 			if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
 			then
-				mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile);
+				mothur "#set.logfile(name=$LOG, append=T);
 				set.dir(input=../../libraries/fasta, output=./);
 				trim.seqs(fasta=$i.fasta, oligos=oligos.$i, qfile=$i.qual, flip=$FLIP, qaverage=$QUAL, bdiffs=$BDIFFS, pdiffs=$PDIFFS, maxambig=$MAXAMBIG, maxhomop=$MAXHOMOP, minlength=$LENGTH)"
 			else
-				mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile);
+				mothur "#set.logfile(name=$LOG, append=T);
 				set.dir(input=../../libraries/fasta, output=./);
 				trim.seqs(fasta=$i.fasta, oligos=oligos.$i, qfile=$i.qual, flip=$FLIP, qaverage=$QUAL, bdiffs=$BDIFFS, pdiffs=$PDIFFS, maxambig=$MAXAMBIG, maxhomop=$MAXHOMOP, minlength=$LENGTH, keepfirst=$LENGTH)"
 			fi
 		fi
 	elif [ $TECH == "Illumina" ]
 	then
 		QUAL=`cat $EXEC/quality_check/optimized.quality.txt`
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile);
+		mothur "#set.logfile(name=$LOG, append=T);
 		set.dir(input=../../libraries/fasta, output=./);
 		trim.seqs(fasta=$i.pairend.fasta, qfile=$i.pairend.qual, qaverage=$QUAL, maxambig=$MAXAMBIG, maxhomop=$MAXHOMOP, minlength=$MINLEN, maxlength=$MAXLEN)"
 	fi
@@ -106,85 +107,88 @@ then
 	SIZE=$(grep "Minimum" ../../quality_check/$SUBPROJECT.summary.stat.tsv | cut -f 3 | cut -d "." -f 1)
 	if [ $TECH == "454" ] && [ "$DENOISE" == "yes" ]
 	then
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
+		mothur "#set.logfile(name=$LOG, append=T);
 		unique.seqs(fasta=$SAMP.trim.fasta, name=$SAMP.names);
 		sub.sample(fasta=current, name=current, size=$SIZE);
 		get.current()"
 	else
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
+		mothur "#set.logfile(name=$LOG, append=T);
 		unique.seqs(fasta=$SAMP.trim.fasta);
 		sub.sample(fasta=current, name=current, size=$SIZE);
 		get.current()"
 	fi
 else
 	if [ $TECH == "454" ] && [ "$DENOISE" == "yes" ]
 	then
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
+		mothur "#set.logfile(name=$LOG, append=T);
 		unique.seqs(fasta=$SAMP.trim.fasta, name=$SAMP.names);
 		get.current()"
 	else
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
+		mothur "#set.logfile(name=$LOG, append=T);
 		unique.seqs(fasta=$SAMP.trim.fasta);
 		get.current()"
 	fi
 fi
 
 #store current files in variables
-unset LOG FASTA NAMES
-LOG=mothur.${TECH}_trim.$SAMP.logfile
+unset FASTA NAMES
 FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
 NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
 
-# align to reference (silva DB only), pre-cluster (silva DB and/or Illumina), and chimera removal
-if [ ! -z $DBCHOP ] && [ $TECH == "454" ]
+# Precluster
+if [ $PRECLUST == "mothur" ]
 then
-	PRECDIFF=$(( ${LENGTH} / 100 ))
-	mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
-	set.dir(tempdefault=/data/ecogen/databases/mothur);
-	align.seqs(candidate=$FASTA.fasta, template=$DBCHOP.fasta, align=needleman);
-	screen.seqs(fasta=current, name=$NAMES.names, optimize=start-end, criteria=95);
-	filter.seqs(fasta=current, vertical=T, trump=.);
-	pre.cluster(fasta=current, name=current, diffs=$PRECDIFF);
-	chimera.uchime(fasta=current, name=current, reference=self);
-	get.current()"
-	unset FASTA NAMES ACCNOS
-	FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
-	NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
-	ACCNOS=`tac $LOG | sed -n '1,/get.current()/p' | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
-	if [ -s $ACCNOS.accnos ]
+	# pre-cluster at max 1% after aligning to reference and removing the 5 % worse aligned reads
+	if [ $TECH == "454" ] && [ $ITSX == "no" ]
 	then
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
-		remove.seqs(accnos=$ACCNOS.accnos, fasta=$FASTA.fasta, name=$NAMES.names);
+		PRECDIFF=$(( ${LENGTH} / 100 ))
+	else
+		PRECDIFF=$(twofasta $FASTA.fasta | awk -v M=$MAXLEN 'BEGIN{min=M}length($1)<M{min=length($1)}END{print int(min/100)'awk '$1!~"^>"{sum+=length($1)}END{print int(sum/(FNR/2)/100)}')
+	fi
+	if [ $PRECDIFF -ge 1 ]
+	then
+		mothur "#set.logfile(name=$LOG, append=T);
+		set.dir(tempdefault=/data/ecogen/databases/mothur);
+		align.seqs(candidate=$FASTA.fasta, template=$DBCHOP.fasta, align=needleman);
+		screen.seqs(fasta=current, name=$NAMES.names, optimize=start-end, criteria=95);
 		filter.seqs(fasta=current, vertical=T, trump=.);
+		pre.cluster(fasta=current, name=current, diffs=$PRECDIFF);
 		get.current()"
+		unset FASTA NAMES
+		FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
+		NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
+	else
+		echo "The pre-culstering step was skipped because the smallest sequence have less than 100nt."
 	fi
-elif [ $TECH == "Illumina" ]
+elif [ $PRECLUST == "cdhit454" ]
 then
-	# pre-cluster with cd-hit-454, allowing 1% dissimilarity with indel of size 1 at max 
+	# pre-cluster with cd-hit-454, allowing 1% dissimilarity with indel of max 1 nt long
 	cdhit4542mothur () {
 		bak=$1 ; fasta=$2 ; names=$3
 		nl $bak | sed 's/>//;s/\.\.\.//' | awk '{print $4,$1,$2}' | sort -k 1,1 | join - <(sort -k 1,1 $names) | awk '{size=split($4,a,",");print $0,size}' | sort -k 3,3n -k 5,5nr | awk '{if(NR==1){nb=$3;printf "%s\t%s\t%s", $1,$2,$4} else {if($3==nb){printf ",%s", $4} else {nb=$3;printf "\n%s\t%s\t%s",$1,$2,$4}}}END{printf "\n"}' | sort -k 2,2n | cut -f 1,3 > ${names%.*}.precl.names
-		obigrep --uppercase --id-list=<(cut -f 1 ${names%.*}.precl.names) $fasta| twofasta > ${fasta%.*}.precl.fasta
-		echo "cdhit4542mothur output files:#${fasta%.*}.precl.fasta#${names%.*}.precl.names" | tr "#" "\n"
+		obigrep --uppercase --id-list=<(cut -f 1 ${names%.*}.precl.names) $fasta | twofasta > ${fasta%.*}.precl.fasta
 	}
 	cd-hit-454 -B 1 -g 1 -M 6000 -c 0.99 -bak 1 -i $FASTA.fasta -o $FASTA.precl.fasta.temp
 	cdhit4542mothur $FASTA.precl.fasta.temp.bak.clstr $FASTA.fasta $NAMES.names
 	rm $FASTA.precl.fasta.temp*
-	mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
-	chimera.uchime(fasta=$FASTA.precl.fasta, name=$NAMES.precl.names, reference=self);
+	FASTA=$FASTA.precl
+	NAMES=$NAMES.precl
+fi
+
+# [ITSx and] Chimera removal
+if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
+then
+	ITSx -i $FASTA.fasta --save_regions $ITSX --preserve T --graphical F -o $FASTA.itsx
+	awk '$0~"^>"{sub(">","");print $1}' $FASTA.itsx.$ITSX.fasta | sort | join - <(sort -k 1,1 $NAMES.names) > $NAMES.itsx.$ITSX.names
+	mothur "#set.logfile(name=$LOG, append=T);
+	unique.seqs(fasta=$FASTA.itsx.$ITSX.fasta, name=$NAMES.itsx.$ITSX.names);
 	get.current()"
-	unset FASTA NAMES ACCNOS
+	unset FASTA NAMES
 	FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
 	NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
-	ACCNOS=`tac $LOG | sed -n '1,/get.current()/p' | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
-	if [ -s $ACCNOS.accnos ]
-	then
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
-		remove.seqs(accnos=$ACCNOS.accnos, fasta=$FASTA.fasta, name=$NAMES.names);
-		get.current()"
-	fi
-else
-	mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
+elif [ $CHIMERA == "before" ] || [ $CHIMERA == "both" ]
+then
+	mothur "#set.logfile(name=$LOG, append=T);
 	chimera.uchime(fasta=$FASTA.fasta, name=$NAMES.names, reference=self);
 	get.current()"
 	unset FASTA NAMES ACCNOS
@@ -193,25 +197,12 @@ else
 	ACCNOS=`tac $LOG | sed -n '1,/get.current()/p' | grep "^accnos=" | sed 's/accnos=//;s/\.accnos//'`
 	if [ -s $ACCNOS.accnos ]
 	then
-		mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
+		mothur "#set.logfile(name=$LOG, append=T);
 		remove.seqs(accnos=$ACCNOS.accnos, fasta=$FASTA.fasta, name=$NAMES.names);
 		get.current()"
 	fi
 fi
 
-# ITSx
-if [ $TARG == "ITS" ] && [ $ITSX != "no" ]
-then
-	unset FASTA NAMES
-	FASTA=`tac $LOG | sed -n '1,/get.current()/p' | grep "^fasta=" | sed 's/fasta=//;s/\.fasta//'`
-	NAMES=`tac $LOG | sed -n '1,/get.current()/p' | grep "^name=" | sed 's/name=//;s/\.names//'`
-	ITSx -i $FASTA.fasta --save_regions $ITSX --preserve T --graphical F -o $FASTA.itsx
-	awk '$0~"^>"{sub(">","");print $1}' $FASTA.itsx.$ITSX.fasta | sort | join - <(sort -k 1,1 $NAMES.names) > $NAMES.itsx.$ITSX.names
-	mothur "#set.logfile(name=mothur.${TECH}_trim.$SAMP.logfile, append=T);
-	unique.seqs(fasta=$FASTA.itsx.$ITSX.fasta, name=$NAMES.itsx.$ITSX.names);
-	get.current()"
-fi
-
 # list files and directories
 . $BIN/list_step_files.sh