# Analyze 2018 Course Data to Counts

## Load Variables and Make Directories

In [None]:
source star_2018_course_config.sh
# rm -rf $CUROUT
mkdir -p $STAR_OUT $GENOME_DIR $FINAL_COUNTS

## Download Genome and Annotation

In [None]:
for CUR in $FA_URL $GTF_URL ; do
    wget --directory-prefix ${GENOME_DIR} ${CUR}
done

In [None]:
gunzip --force ${GENOME_DIR}/${GTF}.gz
gunzip --force ${GENOME_DIR}/${FA}.gz

## Index Genome

In [None]:
STAR \
    --runThreadN $THREADS \
    --runMode genomeGenerate \
    --genomeDir $GENOME_DIR \
    --genomeFastaFiles ${GENOME_DIR}/${FA} \
    --sjdbGTFfile ${GENOME_DIR}/${GTF} \
    --outFileNamePrefix ${STAR_OUT}/genome_ \
    --sjdbGTFfeatureExon exon \
    --sjdbGTFtagExonParentTranscript transcript_id \
    --sjdbGTFtagExonParentGene gene_id \
    --genomeSAindexNbases 6

## Trim and Map Reads

In [None]:
trim_and_star_func() {
    FASTQ=$1
    FASTQ_BASE=${FASTQ##*/} # strip directory from file path
    SAMPLE="${FASTQ_BASE%_R1_001.fastq.gz}" # strip .fq.gz file extension
    echo $SAMPLE
    echo $FASTQ
    echo $FASTQ_BASE
    # exit 1


    # make a pipe for trimmed fastq
    CUR_PIPE=`mktemp --dry-run`_${SAMPLE}_pipe.fq
    mkfifo $CUR_PIPE

    # Run fastq-mcf
    fastq-mcf \
        $ADAPTERS \
        $FASTQ \
        -o $CUR_PIPE \
        -q 20 -x 0.5 &
        
    STAR \
    --runMode alignReads \
    --runThreadN $THREADS \
    --genomeDir $GENOME_DIR \
    --outSAMtype BAM SortedByCoordinate \
    --limitBAMsortRAM 1280000000 \
    --quantMode GeneCounts \
    --genomeLoad LoadAndKeep \
    --twopassMode None \
    --outFileNamePrefix ${STAR_OUT}/${SAMPLE}_ \
    --alignIntronMax 5000 \
    --outSJfilterIntronMaxVsReadN 500 1000 2000 \
    --readFilesIn $CUR_PIPE        
        
    rm -f $CUR_PIPE
}
export -f trim_and_star_func

STAR --genomeDir $GENOME_DIR \
    --outFileNamePrefix ${STAR_OUT}/genomeload_ \
    --genomeLoad LoadAndExit 

parallel --jobs $MAX_JOBS trim_and_star_func ::: $RAW_FASTQS/*.fastq.gz
# parallel --jobs $MAX_JOBS trim_and_star_func ::: $RAW_FASTQS/A_[1-2]_*.fastq.gz

STAR --genomeDir $GENOME_DIR \
    --outFileNamePrefix ${STAR_OUT}/genomeremove_ \
    --genomeLoad Remove

In [None]:
chmod u+w $DATA_BASE
mkdir -p $FINAL_COUNTS
chmod -R u+w $FINAL_COUNTS
mv $STAR_OUT/*_ReadsPerGene.out.tab $FINAL_COUNTS
chmod -R a-w $DATA_BASE

In [None]:
chmod u+w $DATA_BASE
mkdir -p $FINAL_BAMS
chmod -R u+w $FINAL_BAMS
mv $STAR_OUT/*_Aligned.sortedByCoord.out.bam $FINAL_BAMS
chmod -R a-w $DATA_BASE