Skip to content

Commit

Permalink
finished star and cellranger
Browse files Browse the repository at this point in the history
  • Loading branch information
kafkasl committed Nov 10, 2022
1 parent c4673d9 commit 1415f2e
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 10 deletions.
20 changes: 17 additions & 3 deletions bin/cellranger_mtx_to_h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import os
import pandas as pd
import scipy
from scipy import io
from anndata import AnnData

def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False):
Expand All @@ -20,12 +20,24 @@ def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False):

def write_counts(
adata: AnnData,
txp2gene: str,
out: str,
verbose: bool = True,):

pd.DataFrame(adata.var.index).to_csv(os.path.join(out, "features.tsv"), sep="\t", index=False, header=None)
features = pd.DataFrame()
features["id"] = adata.var.index

# if txp2gene file is available enrich features file with gene names
if txp2gene:
t2g = pd.read_table(f"{txp2gene}/star/geneInfo.tab", header=None, skiprows=1)
print(t2g)
id2name = {e[0]: e[1] for _, e in t2g.iterrows()}
print(id2name)
features["name"] = adata.var.index.map(id2name)

features.to_csv(os.path.join(out, "features.tsv"), sep="\t", index=False, header=None)
pd.DataFrame(adata.obs.index).to_csv(os.path.join(out, "barcodes.tsv"), sep="\t", index=False, header=None)
scipy.io.mmwrite(os.path.join(out, "matrix.mtx"), adata.X.T, field="integer")
io.mmwrite(os.path.join(out, "matrix.mtx"), adata.X.T, field="integer")

if verbose:
print("Wrote features.tsv, barcodes.tsv, and matrix.mtx files to {}".format(args["out"]))
Expand All @@ -40,6 +52,7 @@ def write_counts(
parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False)
parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
parser.add_argument("-o", "--out", dest="out", help="Output path.")
parser.add_argument("--txp2gene", dest="txp2gene", help="Transcript to gene (t2g) file.", nargs='?', const='')

args = vars(parser.parse_args())

Expand All @@ -54,6 +67,7 @@ def write_counts(

write_counts(
adata,
args["txp2gene"],
args["sample"],
verbose=args["verbose"]
)
Expand Down
13 changes: 11 additions & 2 deletions bin/mtx_to_h5ad.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
import argparse
import os
import scipy
from scipy import io
from anndata import AnnData

def mtx_to_adata(
Expand Down Expand Up @@ -33,6 +33,7 @@ def mtx_to_adata(
def write_counts(
adata: AnnData,
txp2gene: str,
star_index: str,
out: str,
verbose: bool = True,):

Expand All @@ -48,9 +49,15 @@ def write_counts(
id2name = {e[1]: e[2] for _, e in t2g.iterrows()}
features["name"] = adata.var.index.map(id2name)

# if star_index file is available enrich features file with gene names
if star_index:
t2g = pd.read_table(f"{star_index}/geneInfo.tab", header=None, skiprows=1)
id2name = {e[0]: e[1] for _, e in t2g.iterrows()}
features["name"] = adata.var.index.map(id2name)

features.to_csv(os.path.join(out, "features.tsv"), sep="\t", index=False, header=None)
pd.DataFrame(adata.obs.index).to_csv(os.path.join(out, "barcodes.tsv"), sep="\t", index=False, header=None)
scipy.io.mmwrite(os.path.join(out, "matrix.mtx"), adata.X.T, field="integer")
io.mmwrite(os.path.join(out, "matrix.mtx"), adata.X.T, field="integer")

if verbose:
print("Wrote features.tsv, barcodes.tsv, and matrix.mtx files to {}".format(args["out"]))
Expand All @@ -68,6 +75,7 @@ def write_counts(
parser.add_argument("-o", "--out", dest="out", help="Output path.")
parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?")
parser.add_argument("--txp2gene", dest="txp2gene", help="Transcript to gene (t2g) file.", nargs='?', const='')
parser.add_argument("--star_index", dest="star_index", help="Star index folder containing geneInfo.tab.", nargs='?', const='')

args = vars(parser.parse_args())

Expand All @@ -90,6 +98,7 @@ def write_counts(
write_counts(
adata,
args["txp2gene"],
args["star_index"],
args["sample"],
verbose=args["verbose"]
)
Expand Down
4 changes: 4 additions & 0 deletions modules/local/mtx_to_h5ad.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ process MTX_TO_H5AD {
// for each sample, the sub-folders and files come directly in array.
tuple val(meta), path(inputs)
path txp2gene
path star_index

output:
path "${meta.id}/*h5ad", emit: h5ad
Expand Down Expand Up @@ -45,6 +46,7 @@ process MTX_TO_H5AD {
cellranger_mtx_to_h5ad.py \\
--mtx filtered_feature_bc_matrix.h5 \\
--sample ${meta.id} \\
--txp2gene ${txp2gene} \\
--out ${meta.id}/${meta.id}_matrix.h5ad
"""

Expand All @@ -59,6 +61,7 @@ process MTX_TO_H5AD {
--barcode *count/counts_unfiltered/\${input_type}.barcodes.txt \\
--feature *count/counts_unfiltered/\${input_type}.genes.txt \\
--txp2gene ${txp2gene} \\
--star_index ${star_index} \\
--out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ;
done
"""
Expand All @@ -73,6 +76,7 @@ process MTX_TO_H5AD {
--barcode $barcodes_tsv \\
--feature $features_tsv \\
--txp2gene ${txp2gene} \\
--star_index ${star_index} \\
--out ${meta.id}/${meta.id}_matrix.h5ad
"""

Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/align_cellranger.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
include {CELLRANGER_MKGTF} from "../../modules/nf-core/cellranger/mkgtf/main.nf"
include {CELLRANGER_MKREF} from "../../modules/nf-core/cellranger/mkref/main.nf"
include {CELLRANGER_COUNT} from "../../modules/nf-core/cellranger/count/main.nf"
include {MTX_TO_H5AD } from "../../modules/local/mtx_to_h5ad.nf"

// Define workflow to subset and index a genome region fasta file
workflow CELLRANGER_ALIGN {
Expand Down Expand Up @@ -43,4 +42,5 @@ workflow CELLRANGER_ALIGN {
emit:
ch_versions
cellranger_out = CELLRANGER_COUNT.out.outs
txp2gene = cellranger_index
}
4 changes: 3 additions & 1 deletion subworkflows/local/mtx_conversion.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ workflow MTX_CONVERSION {
mtx_matrices
samplesheet
txp2gene
star_index

main:
ch_versions = Channel.empty()
Expand All @@ -18,7 +19,8 @@ workflow MTX_CONVERSION {
//
MTX_TO_H5AD (
mtx_matrices,
txp2gene
txp2gene,
star_index
)

//
Expand Down
1 change: 1 addition & 0 deletions subworkflows/local/starsolo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ workflow STARSOLO {

emit:
ch_versions
star_index = star_index
star_result = STAR_ALIGN.out.tab
star_counts = STAR_ALIGN.out.counts
for_multiqc = STAR_ALIGN.out.log_final.collect{it[1]}.ifEmpty([])
Expand Down
7 changes: 4 additions & 3 deletions workflows/scrnaseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ workflow SCRNASEQ {
)
ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.star_counts)
ch_star_index = STARSOLO.out.star_index
ch_multiqc_star = STARSOLO.out.for_multiqc
}

Expand All @@ -181,15 +182,15 @@ workflow SCRNASEQ {
)
ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out)
ch_txp2gene = CELLRANGER_ALIGN.out.txp2gene
}

// Run mtx to h5ad conversion subworkflow
MTX_CONVERSION (
ch_mtx_matrices,
ch_input,
ch_txp2gene
// ch_genome_fasta,
// ch_filter_gtf
ch_txp2gene,
ch_star_index
)

//Add Versions from MTX Conversion workflow too
Expand Down

0 comments on commit 1415f2e

Please sign in to comment.