In [1]:
import jpy_tools.parseSnake as jps

In [2]:
configPath = '/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/config.yaml'

In [3]:
snakefilePath = '/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/snakefile'

In [4]:
snakefile = jps.SnakeMakeFile()

In [5]:
snakeHeader = jps.SnakeHeader(snakefile, configPath)
snakeHeader.generateContent()

config contents:

    pipelineDir     :/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/scripts/

     resultDir      :/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/results/

   inputFastqDir    :/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/rawData/inputFastq/

   cellRangerRef    :/public/home/liuzj/projects/singleCell/00_data/00_endospermUseData/tair10



In [16]:
runCellRanger = jps.SnakeRule(snakefile, 'runCellRanger', )
runCellRanger.setInput(b=['inputFastqDir'])
runCellRanger.setOutput(a=['step1Finished.empty'])
runCellRanger.setParams(b = ['cellRangerRef'], a = ['cellRangerDir/'])
runCellRanger.setShell("""
mkdir {params.cellRangerDir} && cd {params.cellRangerDir} && cellranger count --id=cellranger --fastqs={input.inputFastqDir} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 && touch {output.step1Finished}
""")
runCellRanger.generateContent()

In [17]:
splitBam = jps.SnakeRule(snakefile, 'splitBam', 2)
splitBam.setInput(a=['step1Finished.empty'])
splitBam.setOutput(a = ['step2Finished.empty'])
splitBam.setParams(a = ['splitedDir/', 'step1ResultDir'], c= dict(step01SplitedCounts=64))
splitBam.setShell("""
python step02_splitBam.py -i {params.step1ResultDir}/cellRangerDir/cellranger/outs/possorted_genome_bam.bam -o {params.splitedDir} -t {params.step01SplitedCounts} &&\
    touch {output.step2Finished}
""")
splitBam.generateContent()

In [18]:


getOverlapInfo = jps.SnakeRule(snakefile, 'getOverlapInfo', 3, threads=64)
getOverlapInfo.setInput(a = ['step2Finished.empty'])
getOverlapInfo.setOutput(a = ['overlapInfo.tsv'])
getOverlapInfo.setParams(a = ['splitedDir/'])
getOverlapInfo.setShell("""
python step03_getOverlapInfo.py -i {params.splitedDir} -o {output.overlapInfo} -t {threads}
""")
getOverlapInfo.generateContent()

getUsefulRegion = jps.SnakeRule(snakefile, 'getUsefulRegion', 4, threads=64)
getUsefulRegion.setInput(a = ['overlapInfo.tsv'])
getUsefulRegion.setOutput(a = ['step4Finished.empty'])
getUsefulRegion.setParams(a = ['lmdbFile/'])
getUsefulRegion.setShell("""
python step04_getUsefulRegion.py -i {input.overlapInfo} -o {params.lmdbFile} -t {threads} &&\
    touch {output.step4Finished}
""")
getUsefulRegion.generateContent()

extractSeq = jps.SnakeRule(snakefile, 'extractSeq', 5, threads=64)
extractSeq.setInput(a = ['step4Finished.empty'], b = ['inputFastqDir'])
extractSeq.setOutput(a = ['step5Finished.empty'])
extractSeq.setParams(a = ['lmdbFile/', 'usefulRegionFastq/'])
extractSeq.setShell("""
python step05_extractSeq.py -i {input.inputFastqDir} -o {params.usefulRegionFastq} -l {params.lmdbFile} -t {threads} -s &&\
    touch {output.step5Finished}
""")
extractSeq.generateContent()

In [19]:
rerunCellRanger = jps.SnakeRule(snakefile, 'rerunCellRanger', 6, threads=64)
rerunCellRanger.setInput(a = ['step5Finished.empty'])
rerunCellRanger.setOutput(a = ['step6Finished.empty'])
rerunCellRanger.setParams(a = ['usefulRegionFastq/', 'rerunCellRangerDir/'], b = ['cellRangerRef'])
rerunCellRanger.setShell("""
mkdir {params.rerunCellRangerDir} && \
    cd {params.rerunCellRangerDir} && \
        cellranger count --id=cellranger --fastqs={params.usefulRegionFastq} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 && \
            touch {output.step6Finished}
""")
rerunCellRanger.generateContent()

In [20]:
snakeAll = jps.SnakeAll(snakefile)
snakeAll.generateContent(step6Finished = 0)

snakefile.generateContent(snakefilePath)

configfile: "/public/home/liuzj/scripts/pipeline/extractUsefulBaseForCellranger/snakemake/config.yaml"
pipelineDir = config['pipelineDir']


rule all:
    input:
        step6Finished = f"{config['resultDir']}step6_rerunCellRanger/step6Finished.empty"

rule runCellRanger:
    input:
        inputFastqDir = config['inputFastqDir']
    output:
        step1Finished = f"{config['resultDir']}step1_runCellRanger/step1Finished.empty"
    params:
        cellRangerRef = config['cellRangerRef'],
        cellRangerDir = f"{config['resultDir']}step1_runCellRanger/cellRangerDir/",
        gpu = "64"

    threads:1
    shell:
        """
cd {pipelineDir}
mkdir {params.cellRangerDir} && cd {params.cellRangerDir} && cellranger count --id=cellranger --fastqs={input.inputFastqDir} --transcriptome={params.cellRangerRef} --localcores={threads} --r1-length=28 && touch {output.step1Finished}
        """

rule splitBam:
    input:
        step1Finished = f"{config['resultDir']}step1_runCellRanger/step1Fini