In [74]:
import jpy_tools.parseSnake as jps

In [75]:
snakeFile = jps.SnakeMakeFile()

In [76]:
snakeHeader = jps.SnakeHeader(snakeFile, '/public/home/liuzj/scripts/pipeline/analyzeNGSData/snakemake/config.yaml')
snakeHeader.addFeature('rawDataPath')
snakeHeader.generateContent()

config contents:

     resultDir      :/public/home/liuzj/scripts/pipeline/analyzeNGSData/results/

    pipelineDir     :/public/home/liuzj/scripts/pipeline/analyzeNGSData/scripts/

    rawDataPath     :/public/home/liuzj/scripts/pipeline/analyzeNGSData/rawData/

     sampleList     :['Total2', 'Total3', 'Total4']

       genome       :/public/home/liuzj/data/Araport11/HISAT2INDEX

      annoGtf       :/public/home/liuzj/data/Araport11/gene.gtf

      annoGff       :/public/home/liuzj/data/Araport11/gene.gff

     repreTrans     :/public/home/liuzj/data/Araport11/represent.proteinIdPos.txt



In [114]:
fastpQC = jps.SnakeRule(snakeFile, 'fastpQC', 0, 16)
fastpQC.setInput(
    c = dict(
        readOne = 'f\'{rawDataPath}{{sample}}_1.fastq.gz\'', \
        readTwo = 'f\'{rawDataPath}{{sample}}_2.fastq.gz\'')
)
fastpQC.setOutput(
    a = ['{{sample}}_AfterQC_1.fastq.gz', '{{sample}}_AfterQC_2.fastq.gz']
)
fastpQC.setParams(
    b = ['useQC'] , a = ['{{sample}}_qcReport.html']
)
fastpQC.setShell("""
if [ {params.useQC} == True ]
then
    fastp -i {input.readOne} -o {output.AfterQC_1} -I {input.readTwo} -O {output.AfterQC_2} -w {threads} -h {params.qcReport}
else
    cp {input.readOne} {output.AfterQC_1} && cp {input.readTwo} {output.AfterQC_2}
fi
""")
fastpQC.generateContent()

In [115]:
mappingUsingHisat = jps.SnakeRule(snakeFile, 'mappingUsingHisat', 1, 16)
mappingUsingHisat.setInput(
    a = ['{{sample}}_AfterQC_1.fastq.gz', '{{sample}}_AfterQC_2.fastq.gz']
)
mappingUsingHisat.setOutput(a = ['{{sample}}_sortedBam.bam'])
mappingUsingHisat.setParams(a = ['{{sample}}_unsortedSam.sam'], b=['genome'])
mappingUsingHisat.setShell("""
hisat2 -x {params.genome} -p {threads} --min-intronlen 20 --max-intronlen 10000 --dta --time -1 {input.AfterQC_1} -2 {input.AfterQC_2} -S {params.unsortedSam} &&\
     samtools sort {params.unsortedSam} -@ {threads} -O bam -o {output.sortedBam} &&\
         samtools index {output.sortedBam} &&\
             rm {params.unsortedSam}
""")
mappingUsingHisat.generateContent()

deDuplicates = jps.SnakeRule(snakeFile, 'deDuplicates', 2, 8)
deDuplicates.setInput(a = ['{{sample}}_sortedBam.bam'])
deDuplicates.setOutput(a = ['{{sample}}_sortedDedupBam.bam'])
deDuplicates.setParams(a = ['{{sample}}_sortedDedupMat.txt'])
deDuplicates.setShell("""
java -jar /public/apps/picard_2.20.2/picard.jar MarkDuplicates VALIDATION_STRINGENCY=SILENT REMOVE_DUPLICATES=true SORTING_COLLECTION_SIZE_RATIO=0.01 I={input.sortedBam} O={output.sortedDedupBam} M={params.sortedDedupMat} &&\
    samtools index {output.sortedDedupBam}
""")
deDuplicates.generateContent()

In [116]:
runStringTie = jps.SnakeRule(snakeFile, 'runStringTie', 3, 16)
runStringTie.setInput(a = ['{{sample}}_sortedDedupBam.bam'], b = ['annoGff'])
runStringTie.setOutput(a = ['{{sample}}/geneStringtieMerge.gtf', '{{sample}}/geneStringtieAbund.tsv'])
runStringTie.setShell("""
stringtie -A {output.geneStringtieAbund} -e --rf -B -p {threads} -G {input.annoGff} -o {output.geneStringtieMerge} {input.sortedDedupBam}
""")
runStringTie.generateContent()

In [117]:
extractRnaRpkm = jps.SnakeRule(snakeFile, 'extractRnaRpkm', 4, 1)
extractRnaRpkm.setInput(a = ['{{sample}}/geneStringtieMerge.gtf'])
extractRnaRpkm.setOutput(a = ['{{sample}}_rpkmRna.tsv', '{{sample}}_rpkmGene.tsv'])
extractRnaRpkm.setParams(c = dict(step4Label = '\'{sample}\''),a = ['step3ResultDir'])
extractRnaRpkm.setShell("""
Rscript extract_rpkm_from_ballgown.R {params.step4Label}  {params.step3ResultDir}{params.step4Label} {output.rpkmRna} {output.rpkmGene} 
""")
extractRnaRpkm.generateContent()

In [118]:
extractGeneCounts = jps.SnakeRule(snakeFile, 'extractGeneCounts', 5, 1)
extractGeneCounts.setInput(c = dict(\
    allRnaRpkm = "[f\"{config['resultDir']}step4_extractRnaRpkm/{sample}_rpkmRna.tsv\" for sample in config['sampleList']]"
    ))
extractGeneCounts.setOutput(a = ['allSampleGeneCounts.csv', 'allSampleTransCounts.csv'])
extractGeneCounts.setParams(a = ['step3ResultDir'])
extractGeneCounts.setShell("""
prepDE.py -g {output.allSampleGeneCounts} -t {output.allSampleTransCounts} -i {params.step3ResultDir}
""")
extractGeneCounts.generateContent()

In [119]:
calIrRatio = jps.SnakeRule(snakeFile, 'calIrRatio', 6, 1)
calIrRatio.setInput(a = ['{{sample}}_sortedDedupBam.bam'], b= ['repreTrans'])
calIrRatio.setOutput(a = ['{{sample}}_irRatio.tsv'])
calIrRatio.setShell("""
python cal_ir_ratio.from_bam.by_splicing_Read_type.version2.py {input.sortedDedupBam} {input.repreTrans} {output.irRatio} 1 4 1
""")
calIrRatio.generateContent()

In [120]:
gatherIrRpkm = jps.SnakeRule(snakeFile, 'gatherIrRpkm', 7, 1)
gatherIrRpkm.setInput(a = ['{{sample}}_irRatio.tsv', '{{sample}}_rpkmGene.tsv'])
gatherIrRpkm.setOutput(a = ['{{sample}}_gatherIrRpkm.tsv'])
gatherIrRpkm.setShell("""
python gatherIrRpkm.py -i {input.irRatio} -R {input.rpkmGene} -o {output.gatherIrRpkm}
""")
gatherIrRpkm.generateContent()

In [121]:
ruleAll = jps.SnakeAll(snakeFile)
ruleAll.generateContent(gatherIrRpkm = 1, allSampleGeneCounts = 0)

In [122]:
snakeFile.generateContent('/public/home/liuzj/scripts/pipeline/analyzeNGSData/snakemake/snakefile')

configfile: "/public/home/liuzj/scripts/pipeline/analyzeNGSData/snakemake/config.yaml"
pipelineDir = config['pipelineDir']
rawDataPath = config['rawDataPath']


rule all:
    input:
        [f"{config['resultDir']}step7_gatherIrRpkm/{sample}_gatherIrRpkm.tsv" for sample in config['sampleList']],
        allSampleGeneCounts = f"{config['resultDir']}step5_extractGeneCounts/allSampleGeneCounts.csv"

rule fastpQC:
    input:
        readOne = f'{rawDataPath}{{sample}}_1.fastq.gz',
        readTwo = f'{rawDataPath}{{sample}}_2.fastq.gz'
    output:
        AfterQC_1 = f"{config['resultDir']}step0_fastpQC/{{sample}}_AfterQC_1.fastq.gz",
        AfterQC_2 = f"{config['resultDir']}step0_fastpQC/{{sample}}_AfterQC_2.fastq.gz"
    params:
        useQC = config['useQC'],
        {{sample}}_qcReport.html = config['{{sample}}_qcReport.html'],
        gpu = "0"

    threads:16
    shell:
        """
cd {pipelineDir}
if [ {params.useQC} == True ]
then
    fastp -i {input.readOne} -o {output.AfterQC_