In [10]:
from jpy_tools import parseSnake as jps

In [11]:
yamlPath = '/public/home/liuzj/scripts/pipeline/calcIrRatioNanopore/snakemake/config20200715.yaml'
snakePath = '/public/home/liuzj/scripts/pipeline/calcIrRatioNanopore/snakemake/snakefile'

In [12]:
snakeFile = jps.SnakeMakeFile()

In [13]:
header = jps.SnakeHeader(snakeFile, yamlPath)
header.generateContent()

config contents:

    pipelineDir     :/public/home/liuzj/scripts/pipeline/calcIrRatioNanopore/scripts/

     resultDir      :/public/home/liuzj/scripts/pipeline/calcIrRatioNanopore/results/

  molMappingResult  :/public/home/liuzj/scripts/pipeline/calcIrRatioNanopore/rawData/molMappingResult.bam

    refAnnotaBed    :/public/home/liuzj/data/Araport11/gene.bed

   repreAnnotaBed   :/public/home/liuzj/data/Araport11/araport11.representative.gene_model.bed



In [14]:
extractOverlapWithAnno = jps.SnakeRule(snakeFile, 'extractOverlapWithAnno', 1,)
extractOverlapWithAnno.setInput(b=['molMappingResult','refAnnotaBed'])
extractOverlapWithAnno.setOutput(a = ['molOverlapResult.bed'])
extractOverlapWithAnno.setShell("""
bedtools intersect -abam {input.molMappingResult} -b {input.refAnnotaBed} -wo -s -split -bed > {output.molOverlapResult}
""")
extractOverlapWithAnno.generateContent()

In [15]:
parseBedtoolsOutput = jps.SnakeRule(snakeFile, 'parseBedtoolsOutput', 2)
parseBedtoolsOutput.setInput(a = ['molOverlapResult.bed'])
parseBedtoolsOutput.setOutput(a = ['parseBedtoolsResult.pkl'])
parseBedtoolsOutput.setShell("""
python step11_parseBedtoolsOutput.py -i {input.molOverlapResult} -o {output.parseBedtoolsResult}
""")
parseBedtoolsOutput.generateContent()

In [16]:
getSplieStats = jps.SnakeRule(snakeFile, 'getSplieStats', 3)
getSplieStats.setInput(a=['molMappingResult.bam'], b=['repreAnnotaBed'])
getSplieStats.setOutput(a = ['overlapWithRefBed.bed', 'intronRetationInfo.tsv'])
getSplieStats.setShell("""
bedtools intersect -abam {input.molMappingResult} -b {input.repreAnnotaBed} -wo -s -split -bed > {output.overlapWithRefBed} &&\
    python step13_getSpliceStats.py -i {output.overlapWithRefBed} -o {output.intronRetationInfo}
""")
getSplieStats.generateContent()

getIrInfo = jps.SnakeRule(snakeFile, 'getIrInfo', 4)
getIrInfo.setInput(a = ['parseBedtoolsResult.pkl', 'intronRetationInfo.tsv'])
getIrInfo.setOutput(a = ['irInfo.tsv'])
getIrInfo.setShell("""
python step14_getIrInfo.py -i {input.intronRetationInfo} -g {input.parseBedtoolsResult} -o {output.irInfo}
""")
getIrInfo.generateContent()

calculateGeneIntronRatio = jps.SnakeRule(snakeFile, 'calculateGeneIntronRatio', 5)
calculateGeneIntronRatio.setInput(a = ['irInfo.tsv'])
calculateGeneIntronRatio.setOutput(a = ['irRatio.tsv'])
calculateGeneIntronRatio.setShell("""
python step15_calculateGeneIntronRatio.py -i {input.irInfo} -o {output.irRatio}
""")
calculateGeneIntronRatio.generateContent()

In [17]:
ruleAll = jps.SnakeAll(snakeFile)
ruleAll.generateContent(irRatio=0) 

In [18]:
snakeFile.generateContent(snakePath)

configfile: "/public/home/liuzj/scripts/pipeline/calcIrRatioNanopore/snakemake/config20200715.yaml"
pipelineDir = config['pipelineDir']


rule all:
    input:
        irRatio = f"{config['resultDir']}step5_calculateGeneIntronRatio/irRatio.tsv"

rule extractOverlapWithAnno:
    input:
        molMappingResult = config['molMappingResult'],
        refAnnotaBed = config['refAnnotaBed']
    output:
        molOverlapResult = f"{config['resultDir']}step1_extractOverlapWithAnno/molOverlapResult.bed"
    params:
        gpu = "0"

    threads:1
    shell:
        """
cd {pipelineDir}
bedtools intersect -abam {input.molMappingResult} -b {input.refAnnotaBed} -wo -s -split -bed > {output.molOverlapResult}
        """

rule parseBedtoolsOutput:
    input:
        molOverlapResult = f"{config['resultDir']}step1_extractOverlapWithAnno/molOverlapResult.bed"
    output:
        parseBedtoolsResult = f"{config['resultDir']}step2_parseBedtoolsOutput/parseBedtoolsResult.pkl"
    params:
        gpu = "0