In [1]:
import jpy_tools.parseSnake as jps

In [2]:
configPath = '/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/config.yaml'

In [9]:
snakefilePath = '/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/snakefile'

In [10]:
snakefile = jps.SnakeMakeFile()

In [11]:
snakeHeader = jps.SnakeHeader(snakefile, configPath)
snakeHeader.generateContent()

In [12]:
runCellRanger = jps.SnakeRule(snakefile, 'runCellRanger',1 ,1 ,64)
runCellRanger.setInput(b=['inputFastqDir'])
runCellRanger.setOutput(a=['step1Finished.empty'])
runCellRanger.setParams(b = ['cellRangerRef'], a = ['cellRangerDir/'])
runCellRanger.setShell("""
mkdir {params.cellRangerDir} && \
    cd {params.cellRangerDir} && \
        cellranger count --id=cellranger --fastqs={input.inputFastqDir} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 && \
            touch {output.step1Finished}
""")
runCellRanger.generateContent()

In [13]:
splitBam = jps.SnakeRule(snakefile, 'splitBam', 2)
splitBam.setInput(a=['step1Finished.empty'])
splitBam.setOutput(a = ['step2Finished.empty'])
splitBam.setParams(a = ['splitedDir/', 'step1ResultDir'], c= dict(step01SplitedCounts=64))
splitBam.setShell("""
python step02_splitBam.py -i {params.step1ResultDir}/cellRangerDir/cellranger/outs/possorted_genome_bam.bam -o {params.splitedDir} -t {params.step01SplitedCounts} &&\
    touch {output.step2Finished}
""")
splitBam.generateContent()

In [14]:


getOverlapInfo = jps.SnakeRule(snakefile, 'getOverlapInfo', 3, threads=64)
getOverlapInfo.setInput(a = ['step1Finished.empty'])
getOverlapInfo.setOutput(a = ['overlapInfo.tsv'])
getOverlapInfo.setParams(a = ['splitedDir/'])
getOverlapInfo.setShell("""
python step03_getOverlapInfo.py -i {params.splitedDir} -o {output.overlapInfo} -t {threads}
""")
getOverlapInfo.generateContent()

getUsefulRegion = jps.SnakeRule(snakefile, 'getUsefulRegion', 4, threads=64)
getUsefulRegion.setInput(a = ['overlapInfo.tsv'])
getUsefulRegion.setOutput(a = ['step4Finished.empty'])
getUsefulRegion.setParams(a = ['lmdbFile/'])
getUsefulRegion.setShell("""
python step04_getUsefulRegion.py -i {input.overlapInfo} -o {params.lmdbFile} -t {threads} &&\
    touch {output.step3Finished}
""")
getUsefulRegion.generateContent()

extractSeq = jps.SnakeRule(snakefile, 'extractSeq', 5, threads=64)
extractSeq.setInput(a = ['step4Finished.empty'], b = ['inputFastqDir'])
extractSeq.setOutput(a = ['step5Finished.empty'])
extractSeq.setParams(a = ['lmdbFile/', 'usefulRegionFastq/'])
extractSeq.setShell("""
python step05_extractSeq.py -i {input.inputFastqDir} -o {params.usefulRegionFastq} -l {params.lmdbFile} -t {threads} -s &&\
    touch {output.step5Finished}
""")
extractSeq.generateContent()

In [None]:
rerunCellRanger = jps.SnakeRule(snakefile, 'rerunCellRanger', 6, threads=64)
rerunCellRanger.setInput(a = ['step5Finished.empty'])
rerunCellRanger.setOutput(a = ['step6Finished.empty'])
rerunCellRanger.setParams(a = ['usefulRegionFastq/', 'rerunCellRanger'], b = ['cellRangerRef'])
rerunCellRanger.setShell("""
mkdir {params.rerunCellRangerDir} && \
    cd {params.rerunCellRangerDir} && \
        cellranger count --id=cellranger --fastqs={params.usefulRegionFastq} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 && \
            touch {output.step1Finished}
""")

In [10]:
snakeAll = jps.SnakeAll(snakefile)
snakeAll.generateContent(step1Finished = 0)

snakefile.generateContent(snakefilePath)

configfile: "/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/config.yaml"
pipelineDir = config['pipelineDir']


rule all:
    input:
        step1Finished = f"{config['resultDir']}step1_runCellRanger/step1Finished.empty"

rule runCellRanger:
    input:
        inputFastqDir = config['inputFastqDir']
    output:
        step1Finished = f"{config['resultDir']}step1_runCellRanger/step1Finished.empty"
    params:
        cellRangerRef = config['cellRangerRef'],
        cellRangerDir = f"{config['resultDir']}step1_runCellRanger/cellRangerDir/"
    threads:64
    shell:
        """
cd {pipelineDir}
jpy_qsub.py --sm -t {threads} -n 1 -N runCellRanger --inline '\
mkdir {params.cellRangerDir} &&     cd {params.cellRangerDir} &&         cellranger count --id=cellranger --fastqs={input.inputFastqDir} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 &&             touch {output.step1Finished}'
        """




In [None]:
runCellRanger = jps.SnakeRule(snakeFile, 'runCellRanger', 1, 1, 64)
runCellRanger.setInput(c = dict(\
allFilteredReadOne = "[f\"{config['inputFastqDir']}{sample}_R1_001.fastq\" for sample in config['sampleList']]", \
allFilteredReadTwo = "[f\"{config['inputFastqDir']}{sample}_R2_001.fastq\" for sample in config['sampleList']]"))
runCellRanger.setOutput(a = ['stepFiveFinished.finished'])
runCellRanger.setParams(a = ['stepFiveTestDir/'], b = ['cellRangerRef', 'filteredDir'], \
c = dict(sampleName = '\'B10XRNA\'', sampleDirName = '\'cellRanger\''))
runCellRanger.setShell("""
mkdir {params.stepFiveTestDir}&& \
cd {params.stepFiveTestDir}&& \
cellranger count --id={params.sampleDirName} --fastqs={params.filteredDir} --sample={params.sampleName} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 &&\
touch {output.stepFiveFinished}
""")
runCellRanger.generateContent()

In [27]:
splitBam = jps.SnakeRule(snakefile, 'splitBam', 1)
splitBam.setInput(b = ['inputBam'])
splitBam.setOutput(a = ['step1Finished.empty'])
splitBam.setParams(a = ['splitedDir/'], c= dict(step01SplitedCounts=64))
splitBam.setShell("""
python step01_splitBam.py -i {input.inputBam} -o {params.splitedDir} -t {params.step01SplitedCounts} &&\
    touch {output.step1Finished}
""")
splitBam.generateContent()

getOverlapInfo = jps.SnakeRule(snakefile, 'getOverlapInfo', 2, threads=64)
getOverlapInfo.setInput(a = ['step1Finished.empty'])
getOverlapInfo.setOutput(a = ['overlapInfo.tsv'])
getOverlapInfo.setParams(a = ['splitedDir/'])
getOverlapInfo.setShell("""
python step02_getOverlapInfo.py -i {params.splitedDir} -o {output.overlapInfo} -t {threads}
""")
getOverlapInfo.generateContent()

getUsefulRegion = jps.SnakeRule(snakefile, 'getUsefulRegion', 3, threads=64)
getUsefulRegion.setInput(a = ['overlapInfo.tsv'])
getUsefulRegion.setOutput(a = ['step3Finished.empty'])
getUsefulRegion.setParams(a = ['lmdbFile/'])
getUsefulRegion.setShell("""
python step03_getUsefulRegion.py -i {input.overlapInfo} -o {params.lmdbFile} -t {threads} &&\
    touch {output.step3Finished}
""")
getUsefulRegion.generateContent()

extractSeq = jps.SnakeRule(snakefile, 'extractSeq', 4, threads=64)
extractSeq.setInput(a = ['step3Finished.empty'], b = ['inputFastqDir'])
extractSeq.setOutput(a = ['step4Finished.empty'])
extractSeq.setParams(a = ['lmdbFile/', 'usefulRegionFastq/'])
extractSeq.setShell("""
python step04_extractSeq.py -i {input.inputFastqDir} -o {params.usefulRegionFastq} -l {params.lmdbFile} -t {threads} -s &&\
    touch {output.step4Finished}
""")
extractSeq.generateContent()

In [28]:
snakeAll = jps.SnakeAll(snakefile)
snakeAll.generateContent(step4Finished = 0)
snakefile.generateContent(snakefilePath)

configfile: "/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/config.yaml"
pipelineDir = config['pipelineDir']


rule all:
    input:
        step4Finished = f"{config['resultDir']}step4_extractSeq/step4Finished.empty"

rule splitBam:
    input:
        inputBam = config['inputBam']
    output:
        step1Finished = f"{config['resultDir']}step1_splitBam/step1Finished.empty"
    params:
        splitedDir = f"{config['resultDir']}step1_splitBam/splitedDir/",
        step01SplitedCounts = 64
    threads:1
    shell:
        """
cd {pipelineDir}
jpy_qsub.py --sm -t {threads} -n 1 -N splitBam --inline '\
python step01_splitBam.py -i {input.inputBam} -o {params.splitedDir} -t {params.step01SplitedCounts} &&    touch {output.step1Finished}'
        """

rule getOverlapInfo:
    input:
        step1Finished = f"{config['resultDir']}step1_splitBam/step1Finished.empty"
    output:
        overlapInfo = f"{config['resultDir']}step2_getOverlapInfo/overlapInfo.tsv"
  