In [1]:
%gui qt5
import napari
import pandas as pd
import numpy as np
import random
import shoji
import loompy

In [14]:
import pickle
class Test:
    def __init__(self, x: int):
        self.x = x
        self.y = 2
        print("init")

    def __getstate__(self):
        return (self.x, self.y)

    def __setstate__(self, state):
        """Restore state from the unpickled state values."""
        self.x, self.y = state
t = Test(3)
t.y = 4
t2 = pickle.loads(pickle.dumps(t))
t2.x, t2.y

init


(3, 4)

## Connect to the shoji database

In [3]:
db = shoji.connect()
if "eel" not in db:
    db.eel = shoji.Workspace()

In [33]:
def make_eel_workspace(name):
    if name not in db.eel:
        # Create a workspace for the dataset
        db.eel[name] = shoji.Workspace()
        ws = db.eel[name]

        # Create some dimensions
        ws.genes = shoji.Dimension(shape=None)   # None means jagged or variable-length
        ws.dots =  shoji.Dimension(shape=None)   # None means jagged
        ws.xy =    shoji.Dimension(shape=2)      # 2 spatial coordinates
        ws.bits =  shoji.Dimension(shape=16)     # 16 bits barcode length

        # Create tensors
        ws.PixelMicrons =        shoji.Tensor("float32", dims=(), inits=0.183333)          # () means scalar
        ws.PixelHoodSize =       shoji.Tensor("uint16", dims=(), inits=3)

        ws.StitchedCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
#        ws.RegisteredCoords =    shoji.Tensor("float32", ("genes", "dots", "xy"))
#        ws.OriginalCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
#        ws.Shift =               shoji.Tensor("float32", ("genes", "dots", "xy"))

#        ws.RawBarcode =          shoji.Tensor("bool",    ("genes", "dots", "bits"))
        ws.Gene =                shoji.Tensor("string",  ("genes", ))
        ws.GeneColor =           shoji.Tensor("uint8",   ("genes", 3))                   # RGB values for genes
#        ws.DotID =               shoji.Tensor("string",  ("genes", "dots"))
#        ws.HammingDistance =     shoji.Tensor("float32", ("genes", "dots"))
#        ws.IntensityNormalized = shoji.Tensor("float32", ("genes", "dots"))
#        ws.Intensity =           shoji.Tensor("float32", ("genes", "dots"))
#        ws.SelectedThreshold =   shoji.Tensor("float32", ("genes", "dots"))
#        ws.FOV =                 shoji.Tensor("uint16",  ("genes", "dots"))
#        ws.Round =               shoji.Tensor("uint16",  ("genes", "dots"))
    return db.eel[name]

In [34]:
def load_eel_data(wsname, parquet_file):
    ws = make_eel_workspace(wsname)
    r = lambda: random.randint(0,255)

    stitched_coords = pd.read_parquet(parquet_file)
    gene_grp = stitched_coords.groupby('gene')

    # Add rows, one at a time, across all tensors
    # (it would be *much* more efficient to append multiple, or all, rows at a time, but our input is organized this way)
    for gene, coords in gene_grp:
        ws.genes.append({
            "StitchedCoords":      coords.loc[:,['c_stitched_coords','r_stitched_coords']].values[None],
#            "RegisteredCoords":    coords.loc[:,['c_px_registered','r_px_registered']].values[None],
#            "OriginalCoords":      coords.loc[:,['c_px_original','r_px_original']].values[None],
#            "Shift":               coords.loc[:,['c_shift','r_shift']].values[None],
#            "RawBarcode":          np.array([np.array([x == 1 for x in vals[0]]) for vals in coords.loc[:, ['raw_barcodes']].values])[None],  # Convert the bytes objects to bool arrays
            "Gene":                np.array(gene, dtype="object")[None],  # Note: must convert string numpy arrays to object arrays
            "GeneColor":           np.array([[r(),r(),r()]], dtype="uint8"),
#            "DotID":               coords.loc[:, ['dot_id']].values.T,
#            "HammingDistance":     coords.loc[:, ['hamming_distance_barcode']].values.T,
#            "IntensityNormalized": coords.loc[:, ['dot_intensity_norm']].values.T,
#            "Intensity":           coords.loc[:, ['dot_intensity_not']].values.T,
#            "SelectedThreshold":   coords.loc[:, ['selected_thr']].values.T,
#            "FOV":                 coords.loc[:, ['fov_num']].values.T,
#            "Round":               coords.loc[:, ['round_num']].values.T,
        })
    
    ws.genes = shoji.Dimension(shape=ws.genes.length)  # Fix the genes dimension

## Load data into the workspace

In [7]:
load_eel_data("human_embryo_1", "/Users/stelin/Downloads/20201215-beads-registered-LBEXP20201207_EEL_HE_test2_stitched_coords_perfect_match_max_2_miss.parquet")

In [36]:
load_eel_data("human_embryo_2", "/Users/stelin/Downloads/20201215-beads-registered-LBEXP20201207_EEL_HE_test2_stitched_coords.parquet")

In [49]:
load_eel_data("gbm_1", "/Users/stelin/Downloads/20201201-beads-registered-JJEXP20201123_hGBM_Amine_test_stitched_coords.parquet")

## Show the workspace

In [50]:
ws = db.eel.gbm_1
ws

Unnamed: 0,shape,length
bits,16.0,0
dots,,0
genes,448.0,448
xy,2.0,0

Unnamed: 0,dtype,rank,dims,shape,(values)
Gene,string,1,genes,448,"[""ACSL1"", ""ACTA2"", ""ADARB2"", ""ADGRV1"", ""AHSP"", ...]"
GeneColor,uint8,2,genes ✕ 3,448 ✕ 3,"[[86, 254, 237], [165, 153, 212], [40, 187, 166], [24, 1 ···"
PixelHoodSize,uint16,0,(),(),3
PixelMicrons,float32,0,(),(),0.18333299458026886
StitchedCoords,float32,3,genes ✕ dots ✕ xy,448 ✕ __ ✕ 2,"[[[-44384.55702555595, 11641.585482128894], [-44481.5570 ···"


## Use napari to browse the data

In [54]:
ws.Gene[...]

array(['ACSL1', 'ACTA2', 'ADARB2', 'ADGRV1', 'AHSP', 'AIF1', 'AKAP6',
       'ALAS2', 'ANKS1B', 'APOC1', 'APOE', 'AQP4', 'ARX', 'ASCL1',
       'ASTN2', 'B2M', 'BARHL1', 'BCAN', 'BCAS1', 'BCL11B', 'BHLHE22',
       'C1QA', 'C1QB', 'C1QC', 'C1QL4', 'C1orf61', 'C5orf58', 'CA8',
       'CACNA2D1', 'CACNA2D3', 'CADM2', 'CALM1', 'CAPS', 'CBLB', 'CCL18',
       'CCL20', 'CCL3', 'CCL3L1', 'CCL4', 'CCL4L2', 'CCL5', 'CCND1',
       'CCNO', 'CCSER1', 'CD163', 'CD2', 'CD36', 'CD3D', 'CD4', 'CD69',
       'CD74', 'CD79B', 'CD84', 'CD8A', 'CD93', 'CD99', 'CDH12', 'CDH8',
       'CDK1', 'CDKN1C', 'CELF4', 'CENPF', 'CHAT', 'CKS2', 'CLDN5', 'CLU',
       'CNTN1', 'CNTN4', 'CNTN5', 'CNTNAP2', 'CNTNAP5', 'COL1A1',
       'COL1A2', 'COL3A1', 'COL4A1', 'CRABP1', 'CREB5', 'CRYAB', 'CSF2RA',
       'CSMD1', 'CST3', 'CTGF', 'CTSB', 'CXCL8', 'CYBB', 'Control1',
       'Control2', 'Control3', 'Control4', 'Control5', 'Control6',
       'Control7', 'DAB1', 'DAB2', 'DBH', 'DCC', 'DCN', 'DLEU2', 'DLK1',
       'DL

In [55]:
ws = db.eel.gbm_1
vw = napari.Viewer('gene')


In [56]:
genes = ws.Gene[...]
coords = ws.StitchedCoords[:]
colors = ws.GeneColor[...]
for i, gene in enumerate(genes):
    xy = coords[i]
    xy[:, 1] = np.max(xy[:, 1]) - xy[:, 1]
    col = '#%02X%02X%02X' % tuple(colors[i])
    _ = vw.add_points(xy, name=gene, size=10, symbol='o', visible=False, edge_color=col, face_color=col)

In [60]:
np.ma.getmask(np.arange(12).reshape(3,4))

False

In [69]:
x = np.ma.array(np.arange(12).reshape(3,4), mask=[[True, True, True, True], [True, True, True, True], [True, False, True, True]])
x

masked_array(
  data=[[--, --, --, --],
        [--, --, --, --],
        [--, 9, --, --]],
  mask=[[ True,  True,  True,  True],
        [ True,  True,  True,  True],
        [ True, False,  True,  True]],
  fill_value=999999)

In [71]:
x[x.mask] = 91
x.data

array([[91, 91, 91, 91],
       [91, 91, 91, 91],
       [91,  9, 91, 91]])

In [93]:
indices = [np.array([1,34,56,79]), np.array([10, 30, 93]), np.array([100, 300, 900, 91])]
chunk_sizes = (10, 40, 10)
addresses = [list(np.unique(ind // sz)) for ind, sz in zip(indices, chunk_sizes)]

In [94]:
np.array(np.meshgrid(*addresses)).T.reshape(-1, len(indices))

array([[ 0,  0,  9],
       [ 0,  2,  9],
       [ 3,  0,  9],
       [ 3,  2,  9],
       [ 5,  0,  9],
       [ 5,  2,  9],
       [ 7,  0,  9],
       [ 7,  2,  9],
       [ 0,  0, 10],
       [ 0,  2, 10],
       [ 3,  0, 10],
       [ 3,  2, 10],
       [ 5,  0, 10],
       [ 5,  2, 10],
       [ 7,  0, 10],
       [ 7,  2, 10],
       [ 0,  0, 30],
       [ 0,  2, 30],
       [ 3,  0, 30],
       [ 3,  2, 30],
       [ 5,  0, 30],
       [ 5,  2, 30],
       [ 7,  0, 30],
       [ 7,  2, 30],
       [ 0,  0, 90],
       [ 0,  2, 90],
       [ 3,  0, 90],
       [ 3,  2, 90],
       [ 5,  0, 90],
       [ 5,  2, 90],
       [ 7,  0, 90],
       [ 7,  2, 90]])

In [106]:
x = np.arange(12).reshape(3,4)
x[0,0] = np.array([[7,7], [7,7]])

ValueError: setting an array element with a sequence.

In [80]:
from itertools import combinations

In [118]:
offsets = (0, 10, 20)
sizes = (10, 12, 14)
x = np.arange(1000000).reshape(100, 100,100)
x[tuple([slice(a, a + b) for a, b in zip(offsets, sizes)])]

(10, 12, 14)

In [119]:
np.arange(1e7)

array([0.000000e+00, 1.000000e+00, 2.000000e+00, ..., 9.999997e+06,
       9.999998e+06, 9.999999e+06])

In [127]:
x = np.ma.arange(12)
x.mask = [False] * 12
x.mask

array([False, False, False, False, False, False, False, False, False,
       False, False, False])