In [None]:
1.	Downloading from SRA : 
wget ftp://ftptrace.ncbi.nlm.nih.gov/sra/srainstant/reads/ByRun/sra/SRR/SRR123/SRR1234567/SRR1234567.sra


In [None]:
2.	Convert SRA files to FastQ: Fastqdump.sh $1

    
#!/bin/sh
#SBATCH --time=1-10:15 -n1 -p dque
fastq-dump --split-files $1

    
    

In [None]:
3.	FASTQC: fastqc.sh $1
    
#!/bin/bash

#SBATCH --time=24:00:00 -n12 -p dque

fastqc -t 24 $1


In [None]:
4.	Split 10 Million reads: fastq_split.sh $1 ($1 zipped.fq.gz files)
    
#!/bin/bash

#SBATCH --time=2:00:00 -n6 -p dque

#Args: Input Fastq files

IN_FILE=$1
echo $IN_FILE

mkdir -p split

e=$(echo $IN_FILE|cut -f1,2 -d".")
echo $e

zcat $IN_FILE | split --verbose -l 40000000 -d -a 4 --filter="pigz -p 2 -c > split/\$FILE.gz" - split.10m.$e

    

In [None]:
5.	Adapter Trimming: trimgalore.sh $1 $2 (val_1 and val_2 files are generated after trimming)


#!/bin/bash

#SBATCH --time=24:00:00 -n6 -p dque

# Args: Read1_forward.fq Read1_reverse.fq

DIR_NAME=$(dirname $1)

mkdir $DIR_NAME/trimmed

OUTPUT_DIR=$DIR_NAME/trimmed

trim_galore --paired -q 20 --length 50 -o $OUTPUT_DIR $1 $2



In [None]:
6.	Aligning: bismark_genocode_align_nady.sh $1 $2 (single .bam file generated for paired-end reads)
    
#!/bin/bash

#SBATCH --time=2-20:00:00 -n12 -p dque

# Arg order: First_read.fq Second_read.fq Read_Group_string

starttime=$(date +"%s")
echo $(date -u -d @${starttime})


DIR_NAME=$(dirname $1)

mkdir "$DIR_NAME"/aligned

OUT_DIR=$DIR_NAME/aligned

echo "Output directory: $OUT_DIR"

bismark --multicore 6 -q -o $OUT_DIR --genome /home/nady/genomes/gencode -1 $1 -2 $2

endtime=$(date +"%s")
echo $(date -u -d @${endtime})
echo "Time elapsed" $(date -u -d @$(($endtime-$starttime)) +"%T")
    

In [None]:
7.	Split by chromosome: split.sh $1
    
#!/bin/sh
#SBATCH --time=10:00:00 -n1 -p dque

bamtools split -in $1 -reference


In [None]:
8.	Merge by chromosome: bamtools_merge.sh $1 ($1 list.txt)
    
#!/bin/sh
#SBATCH --time=1-10:15 -n1 -p dque
# Get file name
FILE=$(basename $1)

# get the working directiory of the file
INPUT_DIR=$(dirname $1)

# create a sorted direction inside if it doesn't exist
mkdir -p "$INPUT_DIR"/gathered

# declare path for output file
OUTPUT_FILE="$INPUT_DIR"/gathered/"$FILE".gathered.bam

METRICS="$INPUT_DIR"/gathered/"$FILE".metrics.txt

echo $FILE
echo $OUT_FILE
echo $METRICS

#java -jar /home/scott/software/picard.jar GatherBamFiles I=$1 O=$OUTPUT_FILE
bamtools merge -list $1 -out $OUTPUT_FILE


In [None]:
9.	M-Bias: mbias_report.sh $1 (use python script for drawing M-Bias plots)
    
#!/bin/bash

#SBATCH --time=2-0 -n24 -p bynode

INPUT_DIR=$(dirname $1)

mkdir -p "$INPUT_DIR"/methylation_extraction

OUTPUT_DIR="$INPUT_DIR"/methylation_extraction


bismark_methylation_extractor -p --comprehensive --report --multicore 8 -o $OUTPUT_DIR $1 --mbias_only


In [None]:
10.	Coordinate sorting: picard_sort_nady.sh $1
    
#!/bin/bash

#SBATCH --time=24:00:00 -n8 -p dque

# Get file name
FILE=$(basename $1)

# get the working directiory of the file
INPUT_DIR=$(dirname $1)

# create a sorted direction inside if it doesn't exist
mkdir -p "$INPUT_DIR"/sorted

# declare path for output file
OUTPUT_FILE="$INPUT_DIR"/sorted/"$FILE".sorted.bam

echo $FILE
echo $OUTPUT_FILE

/home/nady/bin/jre1.8.0_151/bin/java -jar /home/nady/picard/build/libs/picard.jar SortSam I=$1 O=$OUTPUT_FILE SORT_ORDER=coordinate


In [None]:
11.	Deduplication: deduplicate.sh $1
    
#!/bin/bash

#SBATCH --time=24:00:00 -n8 -p dque

# Get file name
FILE=$(basename $1)

# get the working directiory of the file
INPUT_DIR=$(dirname $1)

# create a sorted direction inside if it doesn't exist
mkdir -p "$INPUT_DIR"/deduplicated

# declare path for output file
OUTPUT_FILE="$INPUT_DIR"/deduplicated/"$FILE".deduplicated.bam

METRICS="$INPUT_DIR"/deduplicated/"$FILE".metrics.txt

echo $FILE
echo $OUT_FILE
echo $METRICS

/home/nady/bin/jre1.8.0_151/bin/java -jar /home/nady/picard/build/libs/picard.jar MarkDuplicates REMOVE_DUPLICATES=true I=$1 O=$OUTPUT_FILE M=$METRICS


In [None]:
12.	Query sort: sorting_n_picard_nady.sh $1
    
#!/bin/bash

#SBATCH --time=24:00:00 -n8 -p dque

# Get file name
FILE=$(basename $1)

# get the working directiory of the file
INPUT_DIR=$(dirname $1)

# create a sorted direction inside if it doesn't exist
mkdir -p "$INPUT_DIR"/sorted

# declare path for output file
OUTPUT_FILE="$INPUT_DIR"/sorted/"$FILE".sorted.bam

echo $FILE
echo $OUTPUT_FILE

/home/nady/bin/jre1.8.0_151/bin/java -jar /home/nady/picard/build/libs/picard.jar SortSam I=$1 O=$OUTPUT_FILE SORT_ORDER=queryname

In [None]:
13.	 Methylation Extraction: bismark_methly_extract.sh $1 0 0 3 1 (trimming R1 -5’ -3’ R2 –5’ -3’) Values based on M-Bias plot

#!/bin/bash

#SBATCH --time=2-0 -n24 -p bynode

#Arg Order: File.bam ignore ignore_3prime r2_ignore r2_ignore_3prime

INPUT_DIR=$(dirname $1)

mkdir -p "$INPUT_DIR"/methylation_extraction

OUTPUT_DIR="$INPUT_DIR"/methylation_extraction


bismark_methylation_extractor -p --comprehensive --report --multicore 8 -o $OUTPUT_DIR $1 --ignore $2 --ignore_r2 $4 --ignore_3prime $3 --ignore_3prime_r2 $5 --bedGraph --buffer_size 8G --cytosine_report --gzip --genome_folder /home/nad$


    


In [None]:
14.	Coverage to cytosine: recal_methex.sh $1 ($.cov.gz file)
    
#!/bin/bash

#SBATCH --time=3:00:00 -n4 -p dque


INPUT_DIR=$(dirname $1)
FILE=$(basename $1)

mkdir -p "$INPUT_DIR"/cytocine_cov
OUTPUT_DIR="$INPUT_DIR"/cytocine_cov
OUTPUT_FILE="$INPUT_DIR"/cytocine_cov/"$FILE".cyt

/home/nady/bismarkchathura/bismark_v0.18.1/coverage2cytosine --merge_CpG --genome_folder /home/nady/genomes/gencode -o $OUTPUT_FILE $1



In [None]:
15.	Generate Bins: Use Chathura’s binning script in Python 3
    
    
Command: Python3 bin_methylation_py3_vers2.py $1-cyt.merged_CpG_evidence.cov 100 10 4
1-Bin_size 2-Minimum_Coverage 3_Number_of_CpGs_covered 



A. Prerequisite: Bisulfite Genome Preparation (download genome to folder ~/genomes))

#!/bin/sh
#SBATCH --time=1-10:15 -n1 -p dque
/home/nady/software/Bismark_v0.19.0/bismark_genome_preparation  --verbose ~/genomes/


In [None]:
B. BashRC

# .bashrc

# Source global definitions
if [ -f /etc/bashrc ]; then
        . /etc/bashrc
fi

# Uncomment the following line if you don't like systemctl's auto-paging feature:
# export SYSTEMD_PAGER=

# User specific aliases and functions

export JAVA_HOME=/home/nady/bin/jre1.8.0_151/bin
#export PATH=$JAVA_HOME/bin:$PATH
#export PATH=$PATH:/home/scott/software/samtools/bin
export PATH=$PATH:/home/nady/bin/samtools/bin
#export PATH=$PATH:~/software/bismark_v0.18.1
#export PATH=$PATH:/home/scott/software/FastQC
#export PATH=$PATH:/home/scott/software/TrimGalore-0.4.3
#export PATH=$PATH:/home/scott/software/bowtie2-2.3.2-legacy
export PATH=$PATH:/home/nady/bin
export PATH=$PATH:/home/gunasekara/softwares/biscuit-release
#export PATH=$PATH:/home/noahk/bin/
#export PATH=$PATH:/home/noahk/bin/
export PATH=$PATH:/home/nady/sratoolkit.2.8.2-1-centos_linux64/bin/
export PATH=$PATH:/home/nady/pigz
export PATH=$PATH:/home/nady/picard/build/libs/picard.jar
export PATH=$PATH:/home/nady/software/Bismark_v0.19.0
export PATH=$PATH:/home/nady/software/bowtie2-2.3.3.1-linux-x86_64
export PATH=$PATH:/home/jdong/miniconda2/bin
#export PATH=$PATH:/home/gunasekara/modules/bismark_v0.18.1
export PATH=$PATH:/home/noahk/software/cutadapt-1.8.3/bin
export PATH=$PATH:/home/jdong/sratoolkit.2.8.1-centos_linux64/bin
export PATH=$PATH:/home/noahk/jduryea/samstat-1.5.1/bin
#export PATH=$PATH:/home/coarfa/pkgs/java/bin
export PATH=$PATH:/home/noahk/software/picard/bin
#export PATH=$PATH:/home/coarfa/pkgs/java/picard/picard.jar
#export PATH=$PATH:/home/coarfa/pkgs/picardtools-2.9.0/picard.jar
#export PATH=$PATH:/home/coarfa/pkgs/picard/picard.jar
#export JAVA_HOME=/home/coarfa/pkgs/java
#export PATH=$JAVA_HOME/bin:$PATH
#export PATH=$PATH:/home/coarfa/pkgs/picardtools-2.9.0
#export PATH=$PATH:/home/coarfa/pkgs/java/picard
#export PATH=$PATH:/home/jduryea/trim_galore_v0.4.4
export PATH=$PATH:/home/jduryea
export PATH=$PATH:/home/noahk/software/FastQC/bin
#export PATH=$PATH:/home/noahk/software/trim_galore_v0.4.4
export PATH=$PATH:/home/jduryea/bin/bamtools/bin
export PATH=$PATH:/home/jduryea/bin/biscuit-release
export PATH=$PATH:/home/gunasekara/vcftools/bin/bin
export PATH=$PATH:/home/gunasekara/MethylExtract_1.9.1
export PATH=$PATH:/home/gunasekara/bin/pigz-2.3.4
#export PATH=$PATH:/home/scott/python/bin/
export PATH=$PATH:~/.local/bin
export PATH=$PATH:/home/gunasekara/bin/bbmap

PATH="/home/gunasekara/perl5/bin${PATH:+:${PATH}}"; export PATH;
PERL5LIB="/home/gunasekara/perl5/lib/perl5${PERL5LIB:+:${PERL5LIB}}"; export PERL5LIB;
PERL_LOCAL_LIB_ROOT="/home/gunasekara/perl5${PERL_LOCAL_LIB_ROOT:+:${PERL_LOCAL_LIB_ROOT}}"; export PERL_LOCAL_LIB_ROOT;
PERL_MB_OPT="--install_base \"/home/gunasekara/perl5\""; export PERL_MB_OPT;
PERL_MM_OPT="INSTALL_BASE=/home/gunasekara/perl5"; export PERL_MM_OPT;

# added by Anaconda2 installer
export PATH="/home/nady/anaconda2/bin:$PATH"

# added by Anaconda3 installer
