Skip to content

Commit

Permalink
Implements #39 , which have the side effect to disable weblogo comput…
Browse files Browse the repository at this point in the history
…ation as no info-file can be outputed by cutadapt while using the linked adapter strategy
  • Loading branch information
lentendu committed Sep 14, 2018
1 parent 40d4adc commit f915196
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 13 deletions.
2 changes: 1 addition & 1 deletion bin/make_doc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ then
FWDSIM=`awk -v F=${#FWD} -v D=$PDIFFS 'BEGIN{printf "%.2g\n", 1-D/F}'`
RVSSIM=`awk -v R=${#RVS} -v D=$PDIFFS 'BEGIN{printf "%.2g\n", 1-D/R}'`
echo "Read pairs were extracted from raw libraries if at least one of the two reads hold the expected primer (forward primer for forward library, reverse primer for reverse library) at its 5' end, with a similarity threshold of $FWDSIM and $RVSSIM for the forward and reverse primer, respectively."
CUTAVG=`grep -m 1 "Total read pairs processed:" libraries/fastq/log.cutadapt.* | awk '{sub(",","",$NF);sum+=$NF}END{printf "%.0f\n", sum/NR}'`
CUTAVG=`grep -m 1 "Total read pairs processed:" libraries/fastq/log.cutadapt.* | awk '{gsub(",","",$NF);sum+=$NF}END{printf "%.0f\n", sum/NR}'`
echo "An average of $CUTAVG reads was extracted per pair of libraries."
echo ""
fi
Expand Down
18 changes: 11 additions & 7 deletions src/Illumina_fastq.step
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,20 @@ cd libraries
# Primer clipping
if [ $CLIPPING == "yes" ]
then
#FWDDIS=`awk -v F=${#FWD} -v D=$PDIFFS 'BEGIN{printf "%.2g\n", D/F}'`
RVSDIS=`awk -v R=${#RVS} -v D=$PDIFFS 'BEGIN{printf "%.2g\n", D/R}'`
FWD_RC=$(echo ">fwd#$FWD" | tr "#" "\n" | vsearch --quiet --fastx_revcomp - --fastaout - | grep -v ">")
RVS_RC=$(echo ">rvs#$RVS" | tr "#" "\n" | vsearch --quiet --fastx_revcomp - --fastaout - | grep -v ">")
DISS=`awk -v F=${#FWD} -v R=${#RVS} -v D=$PDIFFS 'BEGIN{DISS=D/F;if(D/R>DISS){DISS=D/R};printf "%.2g\n", DISS}'`
# cutadapt
cutadapt -g $FWD -G $RVS -e $DISS --trimmed-only --no-indels -o fastq/$LIB_NAME.fwd.fastq -p fastq/$LIB_NAME.rvs.fastq $FWD_LIB.fastq $RVS_LIB.fastq --info-file=raw_stat/$FWD_LIB.cutadapt > fastq/log.cutadapt.$LIB_NAME.txt
cutadapt -g $RVS -O ${#RVS} -e $RVSDIS --no-indels --trimmed-only -o fastq/$LIB_NAME.rvs.tmp.fastq --info-file=raw_stat/$RVS_LIB.cutadapt $RVS_LIB.fastq > fastq/log.cutadapt.aux.$LIB_NAME.txt
# in forward primer direction (and conversely for reverse library)
cutadapt -a $FWD...$RVS_RC -A $RVS...$FWD_RC -e $DISS --no-indels --untrimmed-output=fastq/$FWD_LIB.fwd_unt.fastq --untrimmed-paired-output=fastq/$RVS_LIB.fwd_unt.fastq -o fastq/$FWD_LIB.fwd.fastq -p fastq/$RVS_LIB.fwd.fastq $FWD_LIB.fastq $RVS_LIB.fastq > fastq/log.cutadapt.$LIB_NAME.fwd.txt
# in reverse primer direction and reverse complement
cutadapt -a $RVS...$FWD_RC -A $FWD...$RVS_RC -e $DISS --no-indels --trimmed-only -o fastq/$FWD_LIB.rvs.fastq -p fastq/$RVS_LIB.rvs.fastq $FWD_LIB.fwd_unt.fastq $RVS_LIB.fwd_unt.fastq > fastq/log.cutadapt.$LIB_NAME.rvs.txt
vsearch --quiet --fastx_revcomp fastq/$FWD_LIB.rvs.fastq --fastqout - | cat fastq/$FWD_LIB.fwd.fastq - > fastq/$LIB_NAME.fwd.fastq
vsearch --quiet --fastx_revcomp fastq/$RVS_LIB.rvs.fastq --fastqout - | cat fastq/$RVS_LIB.fwd.fastq - > fastq/$LIB_NAME.rvs.fastq
rm fastq/$FWD_LIB.rvs.fastq fastq/$FWD_LIB.fwd.fastq fastq/$FWD_LIB.fwd_unt.fastq fastq/$RVS_LIB.rvs.fastq fastq/$RVS_LIB.fwd.fastq fastq/$RVS_LIB.fwd_unt.fastq
# primer logo
weblogo -c classic -s large -t "$LIB_NAME: $FWD_NAME" < <(awk -v a=${#FWD} 'BEGIN{FS="\t"} NF==11{printf ">%s\n%*s\n",$1,a,$6}' raw_stat/$FWD_LIB.cutadapt | sed 's/ /N/g' )> raw_stat/weblogo.$LIB_NAME.forward.eps
weblogo -c classic -s large -t "$LIB_NAME: primer $RVS_NAME" < <(awk -v a=${#RVS} 'BEGIN{FS="\t"} NF==11{printf ">%s\n%*s\n",$1,a,$6}' raw_stat/$RVS_LIB.cutadapt | sed 's/ /N/g') > raw_stat/weblogo.$LIB_NAME.reverse.eps
rm fastq/$LIB_NAME.rvs.tmp.fastq
# weblogo -c classic -s large -t "$LIB_NAME: $FWD_NAME" < <(awk -v a=${#FWD} 'BEGIN{FS="\t"} NF==11{printf ">%s\n%*s\n",$1,a,$6}' raw_stat/$FWD_LIB.cutadapt | sed 's/ /N/g' )> raw_stat/weblogo.$LIB_NAME.forward.eps
# weblogo -c classic -s large -t "$LIB_NAME: primer $RVS_NAME" < <(awk -v a=${#RVS} 'BEGIN{FS="\t"} NF==11{printf ">%s\n%*s\n",$1,a,$6}' raw_stat/$RVS_LIB.cutadapt | sed 's/ /N/g') > raw_stat/weblogo.$LIB_NAME.reverse.eps
else
ln -s $PWD/$FWD_LIB.fastq $PWD/fastq/$LIB_NAME.fwd.fastq
ln -s $PWD/$RVS_LIB.fastq $PWD/fastq/$LIB_NAME.rvs.fastq
Expand Down
10 changes: 5 additions & 5 deletions src/Illumina_quality.step
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ dd if=$SUBPROJECT.summary.stat.tsv of=../archives/$SUBPROJECT.outputs/$SUBPROJEC
# Merge raw stat statistics and weblogos
cd ..
gs -q -sDEVICE=pdfwrite -o archives/$SUBPROJECT.outputs/$SUBPROJECT.raw_and_pair-end_reads_statistics.pdf libraries/raw_stat/$SUBPROJECT.raw_reads_with_primer_quality.pdf libraries/raw_stat/$SUBPROJECT.pair-end_reads_quality.pdf libraries/raw_stat/$SUBPROJECT.legend.pdf
if [ $CLIPPING == "yes" ]
then
gs -q -sDEVICE=pdfwrite -dEPSCrop -o archives/$SUBPROJECT.outputs/$SUBPROJECT.forward.weblogo.pdf libraries/raw_stat/weblogo.*.forward.eps
gs -q -sDEVICE=pdfwrite -dEPSCrop -o archives/$SUBPROJECT.outputs/$SUBPROJECT.reverse.weblogo.pdf libraries/raw_stat/weblogo.*.reverse.eps
fi
#if [ $CLIPPING == "yes" ]
#then
#gs -q -sDEVICE=pdfwrite -dEPSCrop -o archives/$SUBPROJECT.outputs/$SUBPROJECT.forward.weblogo.pdf libraries/raw_stat/weblogo.*.forward.eps
#gs -q -sDEVICE=pdfwrite -dEPSCrop -o archives/$SUBPROJECT.outputs/$SUBPROJECT.reverse.weblogo.pdf libraries/raw_stat/weblogo.*.reverse.eps
#fi

# list files and directories
. $BIN/list_step_files.sh
Expand Down

0 comments on commit f915196

Please sign in to comment.