In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import fdb
import loompy
import shoji
from tqdm import trange
%load_ext line_profiler

In [2]:
db = shoji.connect()

In [4]:
if "scRNA" in db:
    del db.scRNA

In [5]:
db.scRNA = shoji.Workspace()
db.scRNA.cells = shoji.Dimension(shape=None)
db.scRNA.genes = shoji.Dimension(shape=5000)
db.scRNA.Spliced = shoji.Tensor("int16", ("cells", "genes"), np.random.randint(0, 10, size=(1000, 5000), dtype="int16"))
db.scRNA.Age = shoji.Tensor("uint16", ("cells",), np.random.randint(0, 50, size=1000, dtype="uint16"))
db.scRNA.Length = shoji.Tensor("uint16", ("genes",), np.random.randint(0, 50, size=5000, dtype="uint16"))
db.scRNA.Chromosome = shoji.Tensor("string", ("genes",), np.full(5000, "x", dtype="object"))

In [6]:
%%time
db.scRNA.cells.append({
    "Spliced": np.random.randint(0, 10, size=(2000, 5000), dtype="int16"),
    "Age": np.random.randint(0, 50, size=2000, dtype="uint16")
})

CPU times: user 540 ms, sys: 62.4 ms, total: 602 ms
Wall time: 973 ms


In [7]:
%%time
db.scRNA[:].Spliced.shape

CPU times: user 315 ms, sys: 41 ms, total: 356 ms
Wall time: 667 ms


(3000, 5000)

In [8]:
v = db.scRNA[db.scRNA.cells[:1000] - (db.scRNA.Age > 10), db.scRNA.Length > 40]
v.Spliced.shape
v.Age.shape

(200,)

In [18]:
rna = db.scRNA
rna[rna.cells[20:0:-2]].Age = np.arange(10, dtype="uint16")

In [19]:
db.scRNA[:].Age[:20]

array([ 0,  1,  0,  3,  1,  5,  2,  7,  3,  9,  4, 17,  5, 13,  6, 31,  7,
       24,  8, 27], dtype=uint16)

In [9]:
%%time
with loompy.new("/Users/stelin/test.loom") as ds:
    ds.add_columns(np.random.randint(0, 10, size=(2000, 5000), dtype="int16"), {
        "Age": np.random.randint(0, 50, size=5000, dtype="uint16")
    },
                  row_attrs={"GeneID": np.arange(2000)})

CPU times: user 371 ms, sys: 16 ms, total: 387 ms
Wall time: 387 ms


In [10]:
%%time
with loompy.connect("/Users/stelin/test.loom") as ds:
    x = ds[:,:]
    print(x.shape)

(2000, 5000)
CPU times: user 118 ms, sys: 6.03 ms, total: 124 ms
Wall time: 123 ms


In [11]:
db.mouse = shoji.Workspace()

In [19]:
if "Development" in db.scRNA.mouse:
    del db.mouse.Development
db.mouse.Development = shoji.Workspace()
ws = db.mouse.Development
with loompy.connect("/Users/stelin/cytograph/emelie_20191031/data/Forebrain.loom", validate=False) as ds:
    ws.genes = shoji.Dimension(size=None)
    ws.cells = shoji.Dimension(size=None)
    ras = ['Accession', 'Chromosome', 'End', 'Gene', 'Start', 'Strand']
    cas = ['Age', 'CellCycle', 'CellID', 'Clusters', 'ClustersModularity', 'ClustersSurprise','DoubletFinderFlag', 'DoubletFinderScore',  'MT_ratio','NGenes','Tissue', 'TotalUMI']
    for ra in ras:
        dtype = ds.ra[ra].dtype.name
        ws[ra[0].upper() + ra[1:]] = shoji.Tensor("string" if dtype == "object" else dtype, dims=("genes",))

    STEP = 2000
    for i in trange(0, ds.shape[0], STEP):
        d = {}
        for ra in ras:
            d[ra[0].upper() + ra[1:]] = ds.ra[ra][i:i + STEP]
        db.scRNA.Forebrain.genes.append(d)

    for ca in cas:
        dtype = ds.ca[ca].dtype.name
        ws[ca[0].upper() + ca[1:]] = shoji.Tensor("string" if dtype == "object" else dtype, dims=("cells",))
    ws.Matrix = shoji.Tensor("uint16", ("cells", "genes"))
    
    STEP = 200
    for i in trange(0, ds.shape[1], STEP):
        d = {}
        for ca in cas:
            d[ca[0].upper() + ca[1:]] = ds.ca[ca][i:i + STEP]
        d["Matrix"] = ds[:, i:i + STEP].astype("uint16").T
        db.scRNA.Forebrain.cells.append(d)

  0%|          | 0/17 [00:00<?, ?it/s]

Accession
Chromosome
End
Gene
Start
Strand


100%|██████████| 17/17 [00:21<00:00,  1.28s/it]
100%|██████████| 1048/1048 [1:54:29<00:00,  6.55s/it]     


In [20]:
db.scRNA.Forebrain

Workspace with 0 subspaces, 2 dimensions and 19 tensors:
  cells <Dimension of variable size>
  genes <Dimension of variable size>
  Accession <Tensor 'string' ('genes',), 33538 rows>
  Age <Tensor 'string' ('cells',), 209596 rows>
  CellCycle <Tensor 'float64' ('cells',), 209596 rows>
  CellID <Tensor 'string' ('cells',), 209596 rows>
  Chromosome <Tensor 'string' ('genes',), 33538 rows>
  Clusters <Tensor 'int64' ('cells',), 209596 rows>
  ClustersModularity <Tensor 'int64' ('cells',), 209596 rows>
  ClustersSurprise <Tensor 'int64' ('cells',), 209596 rows>
  DoubletFinderFlag <Tensor 'int64' ('cells',), 209596 rows>
  DoubletFinderScore <Tensor 'float64' ('cells',), 209596 rows>
  End <Tensor 'int64' ('genes',), 33538 rows>
  Gene <Tensor 'string' ('genes',), 33538 rows>
  MT_ratio <Tensor 'float64' ('cells',), 209596 rows>
  Matrix <Tensor 'uint16' ('cells', 'genes'), 209596 rows>
  NGenes <Tensor 'float64' ('cells',), 209596 rows>
  Start <Tensor 'int64' ('genes',), 33538 rows>
  

In [9]:
db.scRNA.Forebrain[db.scRNA.Forebrain.NGenes > 7000].Matrix.shape

(2952, 33538)