In [1]:
import pandas as pd
import glob
import re

import rpy2.robjects as ro
from rpy2.robjects import r
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

In [2]:
pandas2ri.activate()

In [3]:
%load_ext rpy2.ipython

In [4]:
%%R

suppressPackageStartupMessages({
    library(fishpond)
    library(tximport)
    library(ggplot2)
    library(Seurat)
})



demultiplex_alevin <- function(input_dir)
{

    rna.files <- file.path(paste(input_dir, "rna/alevin/quants_mat.gz", sep="/"))
    hto.files <- file.path(paste(input_dir, "hto/alevin/quants_mat.gz", sep="/"))
    rna.txi <- tximport(files = rna.files, type = "alevin")
    hto.txi <- tximport(files = hto.files, type = "alevin")
                           
    common.cells <- intersect(colnames(rna.txi$counts), colnames(hto.txi$counts))
    object <- CreateSeuratObject(rna.txi$counts[, common.cells])
    object[["HTO"]] <- CreateAssayObject(counts = hto.txi$counts[, common.cells])
                           
    DefaultAssay(object) <- "HTO"
    object <- NormalizeData(object, normalization.method = "CLR", margin = 2, verbose = F)
    VariableFeatures(object) <- rownames(object[["HTO"]]@counts)
    object <- ScaleData(object, assay = "HTO", verbose = F)                      
               
    object <- HTODemux(object, assay = "HTO", positive.quantile = 0.99, verbose = F)
    Idents(object) <- "HTO_classification.global"

    data.frame( HTO_classification=object$HTO_classification, HTO_classification.global=object$HTO_classification.global)                       
}



In [5]:
def assay_id_iter():
    for fn in glob.glob('../../_m/*/hto/alevin/quants_mat_cols.txt'):
        m = re.search('_m/([^/]+)/', fn)
        if m is not None:
            yield m.group(1)

In [None]:
def underscore_map_iter(fn):
    with open(fn) as f:
        for line in f:
            s = line.rstrip()
            yield (s.replace('_', '-'), s)

In [None]:
for assay_id in assay_id_iter():
    with localconverter(ro.default_converter + pandas2ri.converter):
        demux_df = ro.conversion.rpy2py(r['demultiplex_alevin']("../../_m/%s" % assay_id))\
        .reset_index().rename(columns={'index':'barcode'})
        demux_df.to_csv('%s.raw.tsv' % assay_id, sep="\t", index=None)
        singlet_df = demux_df[demux_df['HTO_classification.global']=='Singlet'].copy()
        ud = dict(underscore_map_iter('../../_m/%s/hto/alevin/quants_mat_cols.txt' % assay_id))
        singlet_df['HTO_classification'] = singlet_df['HTO_classification'].apply(lambda x: ud[x])
        singlet_df.to_csv('%s.singlets.tsv' % assay_id, sep="\t", index=None)
