Permalink
Browse files

Orchestra rework

  • Loading branch information...
Michael John Steinbaugh
Michael John Steinbaugh committed Mar 10, 2017
1 parent dd5dde3 commit 376ce2ec4cc6d3544531bca230bdd20632f9d46d
View
@@ -0,0 +1,2 @@
.DS_Store
._*
View
@@ -8,7 +8,7 @@ Connect to your high-performance computing (HPC) cluster and clone our git repos
git clone https://github.com/steinbaugh/seqcloud.git ~/seqcloud
```
Append this line to your `.bashrc` file:
Append this line to your `.bash_profile`:
```{bash}
source ~/seqcloud/seqcloud.shrc
View
@@ -0,0 +1,5 @@
# http://www.biogrids.org
if [ -f /programs/biogrids.shrc ]
then
source /programs/biogrids.shrc
fi

This file was deleted.

Oops, something went wrong.
View
@@ -0,0 +1,2 @@
# gzip FASTQ and SAM files
find . -type f -name "*.fastq" -o -name "*.sam" -print0 | xargs -0 -I {} gzip -fv {}
View

This file was deleted.

Oops, something went wrong.
View

This file was deleted.

Oops, something went wrong.
View
@@ -0,0 +1,2 @@
# This turns on debugging with more detailed output to the console
set -x
View

This file was deleted.

Oops, something went wrong.
View

This file was deleted.

Oops, something went wrong.
View

This file was deleted.

Oops, something went wrong.

This file was deleted.

Oops, something went wrong.
@@ -1,15 +1,15 @@
# This requires fastq-dump from sra-tools
# https://github.com/ncbi/sra-tools
if [ ! -d fastq ]; then
mkdir fastq
fi
mkdir -p fastq
cd fastq
while read name; do
if [ ! -e "${name}.fastq.gz" ] && [ ! -e "${name}_1.fastq.gz" ]; then
echo "${name}"
bsub -q priority -W 6:00 fastq-dump --gzip --split-3 --accession "${name}"
while read name
do
if [ ! -e $name.fastq.gz ] && [ ! -e $name_1.fastq.gz ]
then
echo "$name"
bsub -q priority -W 6:00 fastq-dump --gzip --split-3 --accession "$name"
# Remove SRA cache file upon completion
# rm ~/ncbi/public/sra/{$name}.sra
rm ~/ncbi/public/sra/"$name".sra
fi
done < ../SRR_Acc_List.txt
cd ..
View

This file was deleted.

Oops, something went wrong.
View

This file was deleted.

Oops, something went wrong.
View
@@ -1,3 +1,9 @@
# https://bioconda.github.io/
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
rm Miniconda3-latest-Linux-x86_64.sh
conda config --add channels conda-forge
conda config --add channels defaults
conda config --add channels r
conda config --add channels bioconda
conda update conda
@@ -1,7 +1,5 @@
git clone https://github.com/Linuxbrew/brew.git ~/.linuxbrew
brew tap homebrew/science
brew install fastqc
brew install git
brew install kallisto
@@ -1,2 +1 @@
# -R "rusage[mem=16384]"
bsub -Is -q interactive bash
View
@@ -1,24 +1,28 @@
if [ ! -d kallisto ]; then
if [ ! -d kallisto ]
then
mkdir kallisto
fi
cd fastq
for file in `ls *.fastq.gz`; do
base=`basename $file .fastq.gz`
for file in $(ls *.fastq.gz)
do
base=$(basename "$file" .fastq.gz)
# Skip second paired file in loop for simplicity
if [[ ! $base == *"_2" ]]; then
if [[ $base == *"_1" ]]; then
base=`basename $base _1`
if [[ ! $base == *"_2" ]]
then
if [[ $base == *"_1" ]]
then
base=$(basename "$base" _1)
echo "$base (paired)"
file="${base}_1.fastq.gz ${base}_2.fastq.gz"
custom=""
custom=
else
echo "$base (single)"
custom="--single --fragment-length=200 -s 20"
fi
if [ ! -d ../kallisto/$base ]; then
mkdir ../kallisto/$base
kallisto quant --index=../../../genome/kallisto/transcripts.idx --output-dir=../kallisto/$base --bootstrap-samples=100 --threads=12 $custom $file
if [ ! -d ../kallisto/"$base" ]
then
mkdir ../kallisto/"$base"
kallisto quant --index=../../../genome/kallisto/transcripts.idx --output-dir=../kallisto/"$base" --bootstrap-samples=100 --threads=12 $custom "$file"
fi
fi
done
View
@@ -1,5 +1,9 @@
if [ -d kallisto ]; then
# Generate a genome index for kallisto
$fasta="$1"
if [ -d kallisto ]
then
rm -rf kallisto
fi
mkdir kallisto
kallisto index --index=kallisto/transcripts.idx cdna.fa
kallisto index --index=kallisto/transcripts.idx "$fasta"
unset fasta
View
@@ -0,0 +1 @@
bkill 0
@@ -0,0 +1,5 @@
alias e="exit"
alias i="seqcloud interactive"
alias l="ls -cf"
alias la="ls -a"
alias ll="ls -l"
View
@@ -0,0 +1,21 @@
# Enable color support
if [ -x /usr/bin/dircolors ]
then
test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)"
alias dir="dir --color=auto"
alias egrep="egrep --color=auto"
alias fgrep="fgrep --color=auto"
alias grep="grep --color=auto"
alias ls="ls --color=auto"
alias vdir="vdir --color=auto"
fi
# Append to the history file, don't overwrite it
shopt -s histappend
# Check the window size after each command and, if necessary, update the values
# of LINES and COLUMNS
shopt -s checkwinsize
# Make `less` more friendly for non-text input files, see `lesspipe(1)`
[ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)"
@@ -0,0 +1,35 @@
if [ -d /groups/bcbio/ ] && [ -d /n/data1/ ] && [ -d /n/scratch2/ ]
then
orchestra=true
# If not running interactively, don't do anything
[ -z "$PS1" ] && return
# Set variable identifying the `chroot` you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]
then
debian_chroot=$(cat /etc/debian_chroot)
fi
# Set a fancy prompt (non-color, unless we know we "want" color)
case "$TERM" in
xterm-color) color_prompt=yes;;
esac
# We have color support; assume it's compliant with Ecma-48 (ISO/IEC-6429).
# (Lack of such support is extremely rare, and such a case would tend to support
# setf rather than setaf.)
if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null
then
color_prompt=yes
else
color_prompt=
fi
if [ "$color_prompt" = yes ]
then
PS1="${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ "
else
PS1="${debian_chroot:+($debian_chroot)}\u@\h:\w\$ "
fi
unset color_prompt
fi
@@ -0,0 +1,9 @@
# https://conda.io/miniconda.html
if [ -d "$HOME/miniconda2" ]
then
export PATH="$HOME/miniconda2/bin:$PATH"
fi
if [ -d "$HOME/miniconda3" ]
then
export PATH="$HOME/miniconda3/bin:$PATH"
fi
@@ -0,0 +1,27 @@
if [[ $orchestra == true ]]
then
# Unload everything
module load null
# dev
module load dev/boost-1.57.0
module load dev/compiler/cmake-3.3.1
module load dev/compiler/llvm-3.8.0
module load dev/gcc-4.8.5
# module load dev/gcc-5.2.0
module load dev/java/jdk1.8
module load dev/lapack
module load dev/leiningen/stable-feb032016
module load dev/openblas/0.2.14
module load dev/openssl/1.0.1
module load dev/python/2.7.10
# module load dev/python/3.4.2
module load dev/ruby/2.2.4
# seq
module load seq/bcl2fastq/2.17.1.14
module load seq/sratoolkit/2.8.1
# stats
module load stats/R/3.3.1
fi
@@ -0,0 +1,7 @@
# https://bcbio-nextgen.readthedocs.io
if [ -d /opt/bcbio/centos/bin ]
then
export PATH=/opt/bcbio/centos/bin:$PATH
unset PYTHONHOME
unset PYTHONPATH
fi
@@ -0,0 +1,8 @@
if [ -d "$HOME/bin" ]
then
export PATH="$HOME/bin:$PATH"
fi
if [ -d "$HOME/lib" ]
then
export LD_LIBRARY_PATH="$HOME/lib:$LD_LIBRARY_PATH"
fi
@@ -1,4 +1,3 @@
which python
which virtualenv
# /opt/python-3.4.2
virtualenv python --system-site-packages

This file was deleted.

Oops, something went wrong.
View

This file was deleted.

Oops, something went wrong.
@@ -0,0 +1,4 @@
for file in *.fq
do
mv "$file" ${file/%.fq/.fastq}
done
View
@@ -0,0 +1 @@
find . -type d -name $2 -print0 | xargs -0 -I {} rm -rf {}
View
@@ -0,0 +1,4 @@
for file in $(cat files)
do
samtools view -bS aligned/"$file".Aligned.out.sam -o aligned/"$file".bam;
done
View
@@ -0,0 +1 @@
lfs quota -h /n/scratch2
View
@@ -0,0 +1,6 @@
# Create a user folder on the Orchestra scratch disk
# Requires an eCommons user identifier
scratch="/n/scratch2"
mkdir -p "$scratch"/"$1"
chmod 700 "$scratch"/"$1"
ln -s "$scratch"/"$1" ~/scratch
View
@@ -0,0 +1 @@
cd ~/.snapsnot
View
@@ -1,21 +1,34 @@
if [ ! -d sam ]; then
mkdir sam
# This assumes the following data structure:
# - fastq
# - genome (STAR genome dir, preferable to symlink)
# - sam
queue="mcore"
cores="12"
genomeDir="star"
if [ "$#" -gt "0" ]
then
queue="$1"
cores="$2"
genomeDir="$3"
fi
cd fastq
for file in `ls *.fastq.gz`; do
base=`basename $file .fastq.gz`
for fastq in $(ls fastq/*.fastq.gz)
do
base=$(basename "$fastq" .fastq.gz)
# Skip second paired file in loop for simplicity
if [[ ! $base == *"_2" ]]; then
if [[ $base == *"_1" ]]; then
base=`basename $base _1`
if [[ ! "$base" == *"_2" ]]
then
if [[ "$base" == *"_1" ]]
then
base=$(basename "$base" _1)
echo "$base (paired)"
file="${base}_1.fastq.gz ${base}_2.fastq.gz"
fastq="fastq/${base}_1.fastq.gz fastq/${base}_2.fastq.gz"
else
echo "$base (single)"
fi
if [ ! -d ../sam/$base ]; then
mkdir ../sam/$base
bsub -q priority -W 1:00 -n 12 STAR --genomeDir=../../../genome/STAR --outFileNamePrefix=../sam/$base/ --readFilesCommand=zcat --readFilesIn=$file --runThreadN=12 --outFilterType=BySJout --outFilterMultimapNmax=20 --alignSJoverhangMin=8 --alignSJDBoverhangMin=1 --outFilterMismatchNmax=999 --alignIntronMin=20 --alignIntronMax=1000000 --alignMatesGapMax=1000000
if [ ! -d sam/"$base" ]
then
mkdir -p sam/"$base"
bsub -q "$queue" -W 1:00 -n "$cores" STAR --genomeDir="$genomeDir"/ --outFileNamePrefix=sam/"$base"/ --readFilesCommand=zcat --readFilesIn="$fastq" --runThreadN="$cores" --outFilterType=BySJout --outFilterMultimapNmax=20 --alignSJoverhangMin=8 --alignSJDBoverhangMin=1 --outFilterMismatchNmax=999 --alignIntronMin=20 --alignIntronMax=1000000 --alignMatesGapMax=1000000
fi
fi
done
View
@@ -1,10 +1,16 @@
# Ideally, use symlinks with these defaults or you can rename
FASTA="dna.fa"
GTF="gtf"
if [ -d STAR ]; then
rm -rf STAR
genomeDir="star"
genomeFastaFiles="genome.fasta"
sjdbGTFfile="genome.gtf"
if [ "$#" -gt "0" ]
then
genomeDir="$1"
genomeFastaFiles="$1"
sjdbGTFfile="$2"
fi
mkdir STAR
cd STAR
STAR --runMode=genomeGenerate --genomeDir=. --genomeFastaFiles=../dna.fa --sjdbGTFfile=../gtf
if [ -d "$genomeDir" ]
then
rm -rf "$genomeDir"
fi
mkdir -p "$genomeDir"
STAR --runMode=genomeGenerate --genomeDir="$genomeDir" --genomeFastaFiles="$fasta" --sjdbGTFfile="$gtf"
Oops, something went wrong.

0 comments on commit 376ce2e

Please sign in to comment.