In [76]:
import jpy_tools.parseSnake as jps

In [77]:
SNAKE_PATH = '/public/home/liuzj/scripts/pipeline/sicelore/snakemake/snakefile'

In [78]:
snakefile = jps.SnakeMakeFile()

In [79]:
header = jps.SnakeHeader(snakefile, '/public/home/liuzj/scripts/pipeline/sicelore/snakemake/config.yaml')
header.generateContent()

config contents:

    pipelineDir     :/public/home/liuzj/scripts/pipeline/sicelore/scripts/

     resultDir      :/public/home/liuzj/projects/split_barcode/01_20200507/10_sicelore/siceloreResult_20200611/

    barcodeFile     :/public/home/liuzj/projects/split_barcode/01_20200507/10_sicelore/00_data/barcodes.tsv

 cellrangerBamFile  :/public/home/liuzj/projects/split_barcode/00_before0507/02_results/TAIR10_5000_5000_5000/outs/possorted_genome_bam.bam

      geneBed       :/public/home/liuzj/data/Araport11/gene.bed

    genomeFasta     :/public/home/liuzj/data/Araport11/genome.fa

    nanoporeRead    :/public/home/liuzj/projects/split_barcode/01_20200507/00_rawdata/01_basecalledData/00_basecalledData/all/all.fastq

      refFlat       :/public/home/liuzj/data/Araport11/gene.refFlat



In [80]:
parseIlluminaBam = jps.SnakeRule(snakefile, 'parseIlluminaBam', 1)
parseIlluminaBam.setInput(b = ['barcodeFile', 'cellrangerBamFile'])
parseIlluminaBam.setOutput(a = ['illuminaIndex.index'])
parseIlluminaBam.setShell("""
java -jar Jar/IlluminaParser-1.0.jar -i {input.cellrangerBamFile} -o {output.illuminaIndex} -t {input.barcodeFile} -b CB -g GN -u UB
""")
parseIlluminaBam.generateContent()

In [81]:
scanNanoporeReads = jps.SnakeRule(snakefile, 'scanNanoporeReads', 2)
scanNanoporeReads.setInput(b = ['nanoporeRead'])
scanNanoporeReads.setParams( a = ['scanNanoporeReads/', 'allNanopore.fastq'])
scanNanoporeReads.setOutput(a = ['scanNanoporeReads/passed/allNanoporeFWD.fastq'])
scanNanoporeReads.setShell("""
cp {input.nanoporeRead} {params.allNanopore} &&\
java -jar Jar/NanoporeReadScanner-0.5.jar -i {params.allNanopore} -o {params.scanNanoporeReads}
""")
scanNanoporeReads.generateContent()

In [82]:
mapReadsToGenome = jps.SnakeRule(snakefile, 'mapReadsToGenome', 3, threads=56)
mapReadsToGenome.setInput(b = ['geneBed', 'genomeFasta'], a = ['allNanoporeFWD'])
mapReadsToGenome.setOutput(a = ['filtedReads.bam'])
mapReadsToGenome.setShell("""
minimap2 -ax splice -uf --MD --sam-hit-only -t {threads} --junc-bed {input.geneBed} {input.genomeFasta} {input.allNanoporeFWD} |\
samtools sort - -o {output.filtedReads} &&\
samtools index {output.filtedReads}
""")
mapReadsToGenome.generateContent()

In [83]:
tagReadsWithGeneName = jps.SnakeRule(snakefile, 'tagReadsWithGeneName', 4)
tagReadsWithGeneName.setInput(b = ['refFlat'], a = ['filtedReads.bam'])
tagReadsWithGeneName.setOutput(a = ['addGeneName.bam'])
tagReadsWithGeneName.setShell("""
java -jar -Xmx64g Jar/Sicelore-1.0.jar AddGeneNameTag I={input.filtedReads} O={output.addGeneName} REFFLAT={input.refFlat} GENETAG=GE ALLOW_MULTI_GENE_READS=true USE_STRAND_INFO=true VALIDATION_STRINGENCY=SILENT &&\
samtools index {output.addGeneName}
""")
tagReadsWithGeneName.generateContent()

In [84]:
tagReadsWithSeq = jps.SnakeRule(snakefile, 'tagReadsWithSeq', 5)
tagReadsWithSeq.setInput(a = ['addGeneName.bam','allNanoporeFWD'])
tagReadsWithSeq.setOutput(a = ['addSeq.bam'])
tagReadsWithSeq.setShell("""
java -jar -Xmx64g Jar/Sicelore-1.0.jar AddBamReadSequenceTag I={input.addGeneName} O={output.addSeq} FASTQ={input.allNanoporeFWD} VALIDATION_STRINGENCY=SILENT &&\
samtools index {output.addSeq}
""")
tagReadsWithSeq.generateContent()

In [85]:
tagBarcodeUmi = jps.SnakeRule(snakefile, 'tagBarcodeUmi', 6, threads=56)
tagBarcodeUmi.setInput(a = ['addSeq.bam', 'illuminaIndex.index'])
tagBarcodeUmi.setOutput(a = ['addBarcodeUmi.bam','addBarcodeUmi_umifound_.bam', 'addBarcodeUmiLog.log'])
tagBarcodeUmi.setShell("""
java -jar -Xmx64g Jar/NanoporeBC_UMI_finder-1.0.jar -i {input.addSeq} -o {output.addBarcodeUmi} -k {input.illuminaIndex} --ncpu {threads} -b 3 -u 3 --logFile {output.addBarcodeUmiLog}
""")
tagBarcodeUmi.generateContent()

In [86]:
computeConsensusSeq = jps.SnakeRule(snakefile, 'computeConsensusSeq', 7,threads=56)
computeConsensusSeq.setInput(a = ['addBarcodeUmi_umifound_.bam'])
computeConsensusSeq.setOutput(a = ['allConsensus.fa'])
computeConsensusSeq.setParams(a = ['computeConsunsusTemp/'])
computeConsensusSeq.setShell("""
mkdir {params.computeConsunsusTemp} &&\
java -jar -Xmx80g Jar/Sicelore-1.0.jar ComputeConsensus T={threads} I={input.addBarcodeUmi_umifound_} O={output.allConsensus} TMPDIR={params.computeConsunsusTemp}
""")
computeConsensusSeq.generateContent()

In [87]:
mapMolToGenome = jps.SnakeRule(snakefile, 'mapMolToGenome', 8, threads=56)
mapMolToGenome.setInput(b = ['genomeFasta', 'geneBed'], a = ['allConsensus.fa'])
mapMolToGenome.setOutput(a = ['molMappingResult.bam'])
mapMolToGenome.setShell("""
minimap2 -ax splice --secondary=no -uf --MD --sam-hit-only -t {threads} --junc-bed {input.geneBed} {input.genomeFasta} {input.allConsensus} |\
samtools sort - -o {output.molMappingResult} && \
samtools index {output.molMappingResult}
""")
mapMolToGenome.generateContent()

In [88]:
tagMolBc = jps.SnakeRule(snakefile, 'tagMolBc', 9, 1)
tagMolBc.setInput(a = ['molMappingResult.bam'])
tagMolBc.setOutput(a = ['molMappingResultBc.bam'])
tagMolBc.setShell("""
java -jar -Xmx20g Jar/Sicelore-1.0.jar AddBamMoleculeTags I={input.molMappingResult} O={output.molMappingResultBc} &&\
samtools index {output.molMappingResultBc}
""")
tagMolBc.generateContent()

In [89]:
tagMolGeneName = jps.SnakeRule(snakefile, 'tagMolGeneName', 10, 1)
tagMolGeneName.setInput(a = ['molMappingResultBc.bam'], b = ['refFlat'])
tagMolGeneName.setOutput(a = ['molMappingResultBcFinal.bam'])
tagMolGeneName.setShell("""
java -jar -Xmx20g Jar/Sicelore-1.0.jar AddGeneNameTag I={input.molMappingResultBc} O={output.molMappingResultBcFinal} REFFLAT={input.refFlat} GENETAG=GE ALLOW_MULTI_GENE_READS=true USE_STRAND_INFO=true VALIDATION_STRINGENCY=SILENT &&\
samtools index {output.molMappingResultBcFinal}
""")
tagMolGeneName.generateContent()

In [90]:
generateExpressionMatrix = jps.SnakeRule(snakefile, 'generateExpressionMatrix', 11, 1)
generateExpressionMatrix.setInput(a = ['molMappingResultBcFinal.bam'], b = ['refFlat', 'barcodeFile'])
generateExpressionMatrix.setOutput(a = ['generateExpressionMatrixfinished.empty'])
generateExpressionMatrix.setParams(a = ['expressionMatrixDir/'])
generateExpressionMatrix.setShell("""
mkdir {params.expressionMatrixDir} &&\
java -jar -Xmx20g Jar/Sicelore-1.0.jar IsoformMatrix DELTA=2 METHOD=STRICT ISOBAM=true GENETAG=GE I={input.molMappingResultBcFinal} REFFLAT={input.refFlat} CSV={input.barcodeFile} OUTDIR={params.expressionMatrixDir} PREFIX=sicmol VALIDATION_STRINGENCY=SILENT && \
touch {output.generateExpressionMatrixfinished}
""")
generateExpressionMatrix.generateContent()

In [91]:
snakeAll = jps.SnakeAll(snakefile)
snakeAll.generateContent(generateExpressionMatrixfinished = 0)

In [92]:
snakefile.generateContent(SNAKE_PATH)

configfile: "/public/home/liuzj/scripts/pipeline/sicelore/snakemake/config.yaml"
pipelineDir = config['pipelineDir']


rule all:
    input:
        generateExpressionMatrixfinished = f"{config['resultDir']}step11_generateExpressionMatrix/generateExpressionMatrixfinished.empty"

rule parseIlluminaBam:
    input:
        barcodeFile = config['barcodeFile'],
        cellrangerBamFile = config['cellrangerBamFile']
    output:
        illuminaIndex = f"{config['resultDir']}step1_parseIlluminaBam/illuminaIndex.index"
    params:
        gpu = "0"

    threads:1
    shell:
        """
cd {pipelineDir}
java -jar Jar/IlluminaParser-1.0.jar -i {input.cellrangerBamFile} -o {output.illuminaIndex} -t {input.barcodeFile} -b CB -g GN -u UB
        """

rule scanNanoporeReads:
    input:
        nanoporeRead = config['nanoporeRead']
    output:
        allNanoporeFWD = f"{config['resultDir']}step2_scanNanoporeReads/scanNanoporeReads/passed/allNanoporeFWD.fastq"
    params:
        scanNanoporeReads = 