In [1]:
%gui qt5
import napari
import pandas as pd
import numpy as np
import random
import shoji
import loompy

## Connect to the shoji database

In [2]:
db = shoji.connect()

## Create a workspace to hold the EEL data

In [3]:
if "eel" not in db:
    db.eel = shoji.Workspace()

if "mouse_coronal" not in db.eel:
    # Create a workspace for the dataset
    db.eel.mouse_coronal = shoji.Workspace()
    ws = db.eel.mouse_coronal
    
    # Create some dimensions
    ws.genes = shoji.Dimension(shape=None)   # None means jagged or variable-length
    ws.dots =  shoji.Dimension(shape=None)   # None means jagged
    ws.xy =    shoji.Dimension(shape=2)      # 2 spatial coordinates
    ws.bits =  shoji.Dimension(shape=16)     # 16 bits barcode length

    # Create tensors
    ws.PixelMicrons =        shoji.Tensor("float32", dims=(), inits=0.183333)          # () means scalar
    ws.PixelHoodSize =       shoji.Tensor("uint16", dims=(), inits=3)

    ws.StitchedCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.RegisteredCoords =    shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.OriginalCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.Shift =               shoji.Tensor("float32", ("genes", "dots", "xy"))

    ws.RawBarcode =          shoji.Tensor("bool",    ("genes", "dots", "bits"))
    ws.Gene =                shoji.Tensor("string",  ("genes", ))
    ws.GeneColor =           shoji.Tensor("uint8",   ("genes", 3))                   # RGB values for genes
    ws.DotID =               shoji.Tensor("string",  ("genes", "dots"))
    ws.HammingDistance =     shoji.Tensor("float32", ("genes", "dots"))
    ws.IntensityNormalized = shoji.Tensor("float32", ("genes", "dots"))
    ws.Intensity =           shoji.Tensor("float32", ("genes", "dots"))
    ws.SelectedThreshold =   shoji.Tensor("float32", ("genes", "dots"))
    ws.FOV =                 shoji.Tensor("uint16",  ("genes", "dots"))
    ws.Round =               shoji.Tensor("uint16",  ("genes", "dots"))

ws = db.eel.mouse_coronal

## Load data into the workspace

In [4]:
r = lambda: random.randint(0,255)

stitched_coords = pd.read_parquet('/Users/stelin/lars_viz_counts/20200814_stitched_coords_perfect_match_max_2_miss.parquet')
gene_grp = stitched_coords.groupby('gene')

# Add rows, one at a time, across all tensors
# (it would be *much* more efficient to append multiple, or all, rows at a time, but our input is organized this way)
for gene, coords in gene_grp:
    ws.genes.append({
        "StitchedCoords":      coords.loc[:,['c_stitched_coords','r_stitched_coords']].values[None],
        "RegisteredCoords":    coords.loc[:,['c_px_registered','r_px_registered']].values[None],
        "OriginalCoords":      coords.loc[:,['c_px_original','r_px_original']].values[None],
        "Shift":               coords.loc[:,['c_shift','r_shift']].values[None],
        "RawBarcode":          np.array([np.array([x == 1 for x in vals[0]]) for vals in coords.loc[:, ['raw_barcodes']].values])[None],  # Convert the bytes objects to bool arrays
        "Gene":                np.array(gene, dtype="object")[None],  # Note: must convert string numpy arrays to object arrays
        "GeneColor":           np.array([[r(),r(),r()]], dtype="uint8"),
        "DotID":               coords.loc[:, ['dot_id']].values.T,
        "HammingDistance":     coords.loc[:, ['hamming_distance_barcode']].values.T,
        "IntensityNormalized": coords.loc[:, ['dot_intensity_norm']].values.T,
        "Intensity":           coords.loc[:, ['dot_intensity_not']].values.T,
        "SelectedThreshold":   coords.loc[:, ['selected_thr']].values.T,
        "FOV":                 coords.loc[:, ['fov_num']].values.T,
        "Round":               coords.loc[:, ['round_num']].values.T,
    })
    
ws.genes = shoji.Dimension(shape=ws.genes.length)  # Fix the genes dimension

TypeError: object of type 'Dimension' has no len()

## Show the workspace

In [9]:
db.eel.mouse_coronal

Unnamed: 0,shape,length
bits,16.0,0
dots,,0
genes,168.0,168
xy,2.0,0

Unnamed: 0,dtype,rank,dims,shape,(values)
DotID,string,2,genes ✕ dots,168 ✕ __,"[[""1_1_1041"", ""1_1_2471"", ""1_1_3233"", ""1_1_3964"", ""1_1_5 ···"
FOV,uint16,2,genes ✕ dots,168 ✕ __,"[[1, 1, 1, 1, 1, ...], [1, 1, 1, 1, 1, ...], [1, 1, 2, 2 ···"
Gene,string,1,genes,168,"[""Abi3bp"", ""Acta2"", ""Adora2a"", ""Afp"", ""Agt"", ...]"
GeneColor,uint8,2,genes ✕ 3,168 ✕ 3,"[[79, 109, 88], [43, 179, 208], [95, 108, 3], [99, 53, 2 ···"
HammingDistance,float32,2,genes ✕ dots,168 ✕ __,"[[0.125, 0.125, 0.0625, 0.125, 0.125, ...], [0.125, 0.12 ···"
Intensity,float32,2,genes ✕ dots,168 ✕ __,"[[0.00015259021896696422, 0.00022888532845044633, 0.0001 ···"
IntensityNormalized,float32,2,genes ✕ dots,168 ✕ __,"[[0.00015259021896696422, 0.00022888532845044633, 0.0001 ···"
OriginalCoords,float32,3,genes ✕ dots ✕ xy,168 ✕ __ ✕ 2,"[[[1978, 1718], [1678, 1319], [1288, 1107], [79, 908], [ ···"
PixelHoodSize,uint16,0,(),(),3
PixelMicrons,float32,0,(),(),0.18333299458026886


## Use napari to browse the data

In [11]:
vw = napari.Viewer('gene')
genes = ws[:].Gene              # [:] means 'select all rows'
coords = ws[:].StitchedCoords
colors = ws[:].GeneColor
for i, gene in enumerate(genes):
    xy = coords[i]
    col = '#%02X%02X%02X' % tuple(colors[i])
    _ = vw.add_points(xy, name=gene, size=20, symbol='o', visible=False, edge_color=col, face_color=col)

NameError: name 'ws' is not defined

In [9]:
'#%02X%02X%02X' % (r(), r(), r())

'#0845EF'

In [10]:
r = lambda: random.randint(0,255)

with loompy.connect("/Users/stelin/Allbrain.loom", validate=False) as ds:
    xyz = ds.ca.UMAP3D * 1000
    vw = napari.Viewer('gene')
    for cluster in np.unique(ds.ca.Clusters):
        color = '#%02X%02X%02X' % (r(), r(), r())
        _ = vw.add_points(xyz[ds.ca.Clusters == cluster], name="UMAP3D", size=20, edge_color=color, face_color=color, symbol='o', visible=True)

KeyboardInterrupt: 

In [57]:
import loompy
top_genes = []
n_genes_per_cluster = 1
with loompy.connect("/Users/stelin/Allbrain.agg.loom", validate=False) as ds:
    for cluster in np.unique(ds.ca.Clusters):
        enriched = ds["enrichment"][:,:]
        top = ds.ra.Gene[np.argsort(-enriched[:, cluster])][:n_genes_per_cluster]
        top_genes += list(top)
for gene in np.unique(top_genes):
    print(gene)

AC004080.1
AC007402.1
AC008060.1
AC010247.2
AC016152.1
AC022523.1
AC087477.2
AC099489.1
ACTA2
ACTC1
ADAMTS18
ADAMTS3
ADCY8
ADGRL4
ADIRF
AGBL1
AIF1
AK5
AL109930.1
AL138826.1
AL354809.1
AL591686.2
ALDH1L1
AMBN
APOD
AQP4
ASIC4
ATOH1
BARHL1
BCAS1
BEST3
BHLHE22
C1QL4
C1orf53
C5orf58
C8orf34
CA8
CALB2
CALCB
CAPS
CARTPT
CBLN4
CCBE1
CCK
CCL3
CCNA1
CCNO
CD93
CDH19
CDH9
CHRM3
CLDN5
CNPY1
CNTN2
COL19A1
COL1A1
COX6A1
CRABP1
CREG2
CRH
CRHBP
CRYAB
CTXN3
CYP26A1
CYP26B1
DBX2
DLK1
DLX1
DLX2
DLX5
DLX6-AS1
DTHD1
EDN1
EDNRA
EGFR
EML6
EN1
EOMES
ERVMER61-1
EVX1
EYA2
FAM107A
FAM19A1
FAM19A4
FEV
FEZF1
FGF17
FGF19
FLT1
FOXD3
FOXD3-AS1
FRZB
FSTL4
GABRG3
GALNTL6
GATA3
GATA3-AS1
GBX1
GDF10
GFAP
GMNC
GNG8
GNRH1
GPC3
GPC5
GRM1
GRP
GSX1
HAS2
HBB
HBM
HBZ
HCRT
HDC
HELT
HEPACAM
HEPN1
HMX2
HOTAIRM1
HOXA-AS2
HOXA2
HOXB-AS1
HOXB-AS3
HRK
HTR3A
IGFBP5
IL1RAPL2
INPP4B
IRX4
ISL1
ITM2A
KCNQ5
KIF20A
KIF26B-AS1
KIRREL2
KITLG
LAMP5
LBX1
LGR5
LHX4
LHX5
LHX6
LHX8
LINC00682
LINC00698
LINC01210
LINC01830
LINC01951
LINC01965
LINC0205