In [1]:
%gui qt5
import napari
import pandas as pd
import numpy as np
import random
import shoji
import loompy

## Connect to the shoji database

In [2]:
db = shoji.connect()

In [3]:
db.refdb.mouse_development_agg

Unnamed: 0,shape,length
cells,,942
genes,31053.0,31053

Unnamed: 0,dtype,rank,dims,shape,(values)
Accession,string,1,genes,31053,"[""ENSMUSG00000018474"", ""ENSMUSG00000030324"", ""ENSMUSG000 ···"
Age_e10_0,int64,1,cells,__,"[0, 0, 0, 0, 0, ...]"
Age_e11_0,int64,1,cells,__,"[0, 0, 0, 0, 0, ...]"
Age_e12_0,int64,1,cells,__,"[0, 15, 0, 0, 12, ...]"
Age_e12_5,int64,1,cells,__,"[0, 0, 0, 0, 0, ...]"
Age_e13_0,int64,1,cells,__,"[0, 18, 0, 0, 11, ...]"
Age_e13_5,int64,1,cells,__,"[0, 11, 0, 0, 10, ...]"
Age_e14_0,int64,1,cells,__,"[0, 42, 0, 0, 19, ...]"
Age_e14_5,int64,1,cells,__,"[0, 0, 0, 0, 0, ...]"
Age_e15_0,int64,1,cells,__,"[0, 61, 0, 0, 15, ...]"


In [5]:
del db.refdb
db.refdb = shoji.Workspace()
db.refdb.mouse_development = shoji.Workspace()
db.refdb.mouse_development_agg = shoji.Workspace()
db.refdb.mouse_adolescent = shoji.Workspace()
db.refdb.mouse_adolescent_agg = shoji.Workspace()
db.refdb.human = shoji.Workspace()
db.refdb.human_development = shoji.Workspace()
db.refdb.human_development_agg = shoji.Workspace()
db.refdb.human_adult = shoji.Workspace()
db.refdb.human_adult_agg = shoji.Workspace()

In [11]:
db.refdb.mouse_development_agg._from_loom("/Users/stelin/cytograph/mouse_dev/data/dev_all.agg.loom", fix_expression_dtype=True)

## Create a workspace to hold the EEL data

In [12]:
if "eel" not in db:
    db.eel = shoji.Workspace()

if "mouse_coronal" not in db.eel:
    # Create a workspace for the dataset
    db.eel.mouse_coronal = shoji.Workspace()
    ws = db.eel.mouse_coronal
    
    # Create some dimensions
    ws.genes = shoji.Dimension(shape=None)   # None means jagged or variable-length
    ws.dots =  shoji.Dimension(shape=None)   # None means jagged
    ws.xy =    shoji.Dimension(shape=2)      # 2 spatial coordinates
    ws.bits =  shoji.Dimension(shape=16)     # 16 bits barcode length

    # Create tensors
    ws.PixelMicrons =        shoji.Tensor("float32", dims=(), inits=0.183333)          # () means scalar
    ws.PixelHoodSize =       shoji.Tensor("uint16", dims=(), inits=3)

    ws.StitchedCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.RegisteredCoords =    shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.OriginalCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.Shift =               shoji.Tensor("float32", ("genes", "dots", "xy"))

    ws.RawBarcode =          shoji.Tensor("bool",    ("genes", "dots", "bits"))
    ws.Gene =                shoji.Tensor("string",  ("genes", ))
    ws.GeneColor =           shoji.Tensor("uint8",   ("genes", 3))                   # RGB values for genes
    ws.DotID =               shoji.Tensor("string",  ("genes", "dots"))
    ws.HammingDistance =     shoji.Tensor("float32", ("genes", "dots"))
    ws.IntensityNormalized = shoji.Tensor("float32", ("genes", "dots"))
    ws.Intensity =           shoji.Tensor("float32", ("genes", "dots"))
    ws.SelectedThreshold =   shoji.Tensor("float32", ("genes", "dots"))
    ws.FOV =                 shoji.Tensor("uint16",  ("genes", "dots"))
    ws.Round =               shoji.Tensor("uint16",  ("genes", "dots"))

if "mouse_sagittal" not in db.eel:
    # Create a workspace for the dataset
    db.eel.mouse_sagittal = shoji.Workspace()
    ws = db.eel.mouse_sagittal
    
    # Create some dimensions
    ws.genes = shoji.Dimension(shape=None)   # None means jagged or variable-length
    ws.dots =  shoji.Dimension(shape=None)   # None means jagged
    ws.xy =    shoji.Dimension(shape=2)      # 2 spatial coordinates
    ws.bits =  shoji.Dimension(shape=16)     # 16 bits barcode length

    # Create tensors
    ws.PixelMicrons =        shoji.Tensor("float32", dims=(), inits=0.183333)          # () means scalar
    ws.PixelHoodSize =       shoji.Tensor("uint16", dims=(), inits=3)

    ws.StitchedCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.RegisteredCoords =    shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.OriginalCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.Shift =               shoji.Tensor("float32", ("genes", "dots", "xy"))

    ws.RawBarcode =          shoji.Tensor("bool",    ("genes", "dots", "bits"))
    ws.Gene =                shoji.Tensor("string",  ("genes", ))
    ws.GeneColor =           shoji.Tensor("uint8",   ("genes", 3))                   # RGB values for genes
    ws.DotID =               shoji.Tensor("string",  ("genes", "dots"))
    ws.HammingDistance =     shoji.Tensor("float32", ("genes", "dots"))
    ws.IntensityNormalized = shoji.Tensor("float32", ("genes", "dots"))
    ws.Intensity =           shoji.Tensor("float32", ("genes", "dots"))
    ws.SelectedThreshold =   shoji.Tensor("float32", ("genes", "dots"))
    ws.FOV =                 shoji.Tensor("uint16",  ("genes", "dots"))
    ws.Round =               shoji.Tensor("uint16",  ("genes", "dots"))
    
if "mouse_sagittal_20201001" not in db.eel:
    # Create a workspace for the dataset
    db.eel.mouse_sagittal_20201001 = shoji.Workspace()
    ws = db.eel.mouse_sagittal_20201001
    
    # Create some dimensions
    ws.genes = shoji.Dimension(shape=None)   # None means jagged or variable-length
    ws.dots =  shoji.Dimension(shape=None)   # None means jagged
    ws.xy =    shoji.Dimension(shape=2)      # 2 spatial coordinates
    ws.bits =  shoji.Dimension(shape=16)     # 16 bits barcode length

    # Create tensors
    ws.PixelMicrons =        shoji.Tensor("float32", dims=(), inits=0.183333)          # () means scalar
    ws.PixelHoodSize =       shoji.Tensor("uint16", dims=(), inits=3)

    ws.StitchedCoords =      shoji.Tensor("float32", ("genes", "dots", "xy"))
    ws.Gene =                shoji.Tensor("string",  ("genes", ))
    ws.GeneColor =           shoji.Tensor("uint8",   ("genes", 3))                   # RGB values for genes

## Load data into the workspace

In [4]:
r = lambda: random.randint(0,255)
ws = db.eel.mouse_coronal

stitched_coords = pd.read_parquet('/Users/stelin/lars_viz_counts/20200814_stitched_coords_perfect_match_max_2_miss.parquet')
gene_grp = stitched_coords.groupby('gene')

# Add rows, one at a time, across all tensors
# (it would be *much* more efficient to append multiple, or all, rows at a time, but our input is organized this way)
for gene, coords in gene_grp:
    ws.genes.append({
        "StitchedCoords":      coords.loc[:,['c_stitched_coords','r_stitched_coords']].values[None],
        "RegisteredCoords":    coords.loc[:,['c_px_registered','r_px_registered']].values[None],
        "OriginalCoords":      coords.loc[:,['c_px_original','r_px_original']].values[None],
        "Shift":               coords.loc[:,['c_shift','r_shift']].values[None],
        "RawBarcode":          np.array([np.array([x == 1 for x in vals[0]]) for vals in coords.loc[:, ['raw_barcodes']].values])[None],  # Convert the bytes objects to bool arrays
        "Gene":                np.array(gene, dtype="object")[None],  # Note: must convert string numpy arrays to object arrays
        "GeneColor":           np.array([[r(),r(),r()]], dtype="uint8"),
        "DotID":               coords.loc[:, ['dot_id']].values.T,
        "HammingDistance":     coords.loc[:, ['hamming_distance_barcode']].values.T,
        "IntensityNormalized": coords.loc[:, ['dot_intensity_norm']].values.T,
        "Intensity":           coords.loc[:, ['dot_intensity_not']].values.T,
        "SelectedThreshold":   coords.loc[:, ['selected_thr']].values.T,
        "FOV":                 coords.loc[:, ['fov_num']].values.T,
        "Round":               coords.loc[:, ['round_num']].values.T,
    })
    
ws.genes = shoji.Dimension(shape=ws.genes.length)  # Fix the genes dimension

TypeError: object of type 'Dimension' has no len()

In [26]:
r = lambda: random.randint(0,255)
ws = db.eel.mouse_sagittal

stitched_coords = pd.read_parquet('/Users/stelin/20200826_stitched_coords_perfect_match_max_2_miss.parquet')
gene_grp = stitched_coords.groupby('gene')

# Add rows, one at a time, across all tensors
# (it would be *much* more efficient to append multiple, or all, rows at a time, but our input is organized this way)
for gene, coords in gene_grp:
    ws.genes.append({
        "StitchedCoords":      coords.loc[:,['c_stitched_coords','r_stitched_coords']].values[None],
        "RegisteredCoords":    coords.loc[:,['c_px_registered','r_px_registered']].values[None],
        "OriginalCoords":      coords.loc[:,['c_px_original','r_px_original']].values[None],
        "Shift":               coords.loc[:,['c_shift','r_shift']].values[None],
        "RawBarcode":          np.array([np.array([x == 1 for x in vals[0]]) for vals in coords.loc[:, ['raw_barcodes']].values])[None],  # Convert the bytes objects to bool arrays
        "Gene":                np.array(gene, dtype="object")[None],  # Note: must convert string numpy arrays to object arrays
        "GeneColor":           np.array([[r(),r(),r()]], dtype="uint8"),
        "DotID":               coords.loc[:, ['dot_id']].values.T,
        "HammingDistance":     coords.loc[:, ['hamming_distance_barcode']].values.T,
        "IntensityNormalized": coords.loc[:, ['dot_intensity_norm']].values.T,
        "Intensity":           coords.loc[:, ['dot_intensity_not']].values.T,
        "SelectedThreshold":   coords.loc[:, ['selected_thr']].values.T,
        "FOV":                 coords.loc[:, ['fov_num']].values.T,
        "Round":               coords.loc[:, ['round_num']].values.T,
    })
    
ws.genes = shoji.Dimension(shape=ws.genes.length)  # Fix the genes dimension

In [13]:
r = lambda: random.randint(0,255)
ws = db.eel.mouse_sagittal_20201001

stitched_coords = pd.read_parquet("/Users/stelin/Dropbox (Linnarsson Group)/linnarsson group/Projects/EEL/Data/20201001_stitched_coords.parquet")
gene_grp = stitched_coords.groupby('gene')

# Add rows, one at a time, across all tensors
# (it would be *much* more efficient to append multiple, or all, rows at a time, but our input is organized this way)
for gene, coords in gene_grp:
    ws.genes.append({
        "StitchedCoords":      coords.loc[:,['c_stitched_coords','r_stitched_coords']].values[None],
        "Gene":                np.array(gene, dtype="object")[None],  # Note: must convert string numpy arrays to object arrays
        "GeneColor":           np.array([[r(),r(),r()]], dtype="uint8")
    })
    
ws.genes = shoji.Dimension(shape=ws.genes.length)  # Fix the genes dimension

## Show the workspace

In [14]:
db.eel.mouse_sagittal_20201001

Unnamed: 0,shape,length
bits,16.0,0
dots,,0
genes,168.0,168
xy,2.0,0

Unnamed: 0,dtype,rank,dims,shape,(values)
Gene,string,1,genes,168,"[""Abi3bp"", ""Acta2"", ""Adora2a"", ""Afp"", ""Agt"", ...]"
GeneColor,uint8,2,genes ✕ 3,168 ✕ 3,"[[210, 172, 144], [215, 121, 237], [190, 32, 238], [141, ···"
PixelHoodSize,uint16,0,(),(),3
PixelMicrons,float32,0,(),(),0.18333299458026886
StitchedCoords,float32,3,genes ✕ dots ✕ xy,168 ✕ __ ✕ 2,"[[[-7185.324804192235, 18215.40211980093], [-6662.324804 ···"


## Use napari to browse the data

In [18]:
ws = db.eel.mouse_sagittal

vw = napari.Viewer('gene')
genes = ws[:].Gene              # [:] means 'select all rows'
coords = ws[:].StitchedCoords
colors = ws[:].GeneColor
visible = ["Dcn", "Neurod6", "Sox10", "Gdf10", "Aqp4", "Wfs1", "Tnr", "Cplx3"]
for i, gene in enumerate(genes):
    xy = coords[i]
    xy[:, 1] = np.max(xy[:, 1]) - xy[:, 1]
    col = '#%02X%02X%02X' % tuple(colors[i])
    _ = vw.add_points(xy, name=gene, size=10, symbol='o', visible=False, edge_color=col, face_color=col)

In [57]:
import loompy
top_genes = []
n_genes_per_cluster = 1
with loompy.connect("/Users/stelin/Allbrain.agg.loom", validate=False) as ds:
    for cluster in np.unique(ds.ca.Clusters):
        enriched = ds["enrichment"][:,:]
        top = ds.ra.Gene[np.argsort(-enriched[:, cluster])][:n_genes_per_cluster]
        top_genes += list(top)
for gene in np.unique(top_genes):
    print(gene)

AC004080.1
AC007402.1
AC008060.1
AC010247.2
AC016152.1
AC022523.1
AC087477.2
AC099489.1
ACTA2
ACTC1
ADAMTS18
ADAMTS3
ADCY8
ADGRL4
ADIRF
AGBL1
AIF1
AK5
AL109930.1
AL138826.1
AL354809.1
AL591686.2
ALDH1L1
AMBN
APOD
AQP4
ASIC4
ATOH1
BARHL1
BCAS1
BEST3
BHLHE22
C1QL4
C1orf53
C5orf58
C8orf34
CA8
CALB2
CALCB
CAPS
CARTPT
CBLN4
CCBE1
CCK
CCL3
CCNA1
CCNO
CD93
CDH19
CDH9
CHRM3
CLDN5
CNPY1
CNTN2
COL19A1
COL1A1
COX6A1
CRABP1
CREG2
CRH
CRHBP
CRYAB
CTXN3
CYP26A1
CYP26B1
DBX2
DLK1
DLX1
DLX2
DLX5
DLX6-AS1
DTHD1
EDN1
EDNRA
EGFR
EML6
EN1
EOMES
ERVMER61-1
EVX1
EYA2
FAM107A
FAM19A1
FAM19A4
FEV
FEZF1
FGF17
FGF19
FLT1
FOXD3
FOXD3-AS1
FRZB
FSTL4
GABRG3
GALNTL6
GATA3
GATA3-AS1
GBX1
GDF10
GFAP
GMNC
GNG8
GNRH1
GPC3
GPC5
GRM1
GRP
GSX1
HAS2
HBB
HBM
HBZ
HCRT
HDC
HELT
HEPACAM
HEPN1
HMX2
HOTAIRM1
HOXA-AS2
HOXA2
HOXB-AS1
HOXB-AS3
HRK
HTR3A
IGFBP5
IL1RAPL2
INPP4B
IRX4
ISL1
ITM2A
KCNQ5
KIF20A
KIF26B-AS1
KIRREL2
KITLG
LAMP5
LBX1
LGR5
LHX4
LHX5
LHX6
LHX8
LINC00682
LINC00698
LINC01210
LINC01830
LINC01951
LINC01965
LINC0205

In [18]:
import loompy
with loompy.connect("/Users/stelin/cytograph/20191003/data/l5_all.agg.loom") as ds:
    print(ds[np.isin(ds.ra.Gene, ["Actb", "Gapdh", "Sox2"]), :])

[[2.60059172e+01 1.67628866e+01 9.63559322e+00 8.81853282e+00
  1.08774194e+01 1.25806452e+01 1.19548387e+01 2.00560748e+01
  1.45714286e+01 1.50061350e+01 5.14787339e+00 5.28518519e+00
  6.57709251e+00 5.98273736e+00 5.16696113e+00 4.99135447e+00
  1.26734694e+01 6.95402920e+00 4.19019079e+00 4.11156352e+00
  2.89856569e+00 2.96563147e+00 4.22580645e+00 5.91160221e+00
  8.03543307e+00 3.71540470e+00 1.09174312e+01 5.63809524e+00
  5.64367816e+00 7.84722222e+00 1.01562500e+01 4.72043011e+00
  3.16393443e+00 2.79365079e+00 4.35240964e+00 3.36951983e+00
  6.05785124e+00 5.69464899e+00 4.01913265e+00 4.85540335e+00
  5.21348315e+00 4.81021898e+00 3.09070958e+00 6.57142857e+00
  4.23622047e+00 6.37500000e+00 6.31986143e+00 4.64002732e+00
  5.12727273e+00 3.38601036e+00 2.57920792e+00 3.48355585e+00
  3.85708419e+00 5.02581756e+00 4.59242424e+00 5.71071429e+00
  4.37037037e+00 1.30357143e+00 1.48552339e+00 1.60000000e+00
  5.63235294e+00 4.56140351e+00 1.61550388e+00 4.60431655e+00
  4.0368

In [44]:
db["my_workspace"] = shoji.Workspace()

In [45]:
db

Unnamed: 0,Contents
eel,"2 workspaces, 0 dimensions, 0 tensors"
images,"0 workspaces, 1 dimensions, 1 tensors"
my_workspace,"0 workspaces, 0 dimensions, 0 tensors"
refdb,"3 workspaces, 0 dimensions, 0 tensors"
scRNA,"0 workspaces, 2 dimensions, 3 tensors"
test,"0 workspaces, 2 dimensions, 42 tensors"


In [46]:
db.Workspace()

AttributeError: 'WorkspaceManager' object has no attribute 'Workspace'

In [36]:
msg = bytes("Sten Linnarsson Razieh Karamzadeh David Fernández Garcia", "utf8")
dna = []
translation = {}
for x in range(256):
    translation[x] = "CATG"[(x & 0b11000000) >> 6] + "ACGT"[(x & 0b00110000) >> 4] + "ACGT"[(x & 0b00001100) >> 2] + "ACGT"[(x & 0b00000011) >> 0]

encoded = ""
for x in msg:
    encoded += translation[x]
temp = np.array(list(encoded))
gc = (temp == "G").sum() + (temp == "C").sum()
print(encoded, len(encoded), gc)

ACATATCAAGCCAGTGCGAAAATAAGGCAGTGAGTGAGACATAGATATATATAGTTAGTGCGAAACAGAGACATGGAGGCAGCCAGGACGAAAAGTAGACATAGAGACAGTCATGGAGACAGCAAGCCAGGACGAAAACAAGACATCGAGGCAGCACGAAAACGAGCCATAGAGTGGAATTGACAGTGAGCAAGCCATGGCGAAAACTAGACATAGAGATAGGCAGAC 228 105


In [None]:
ACATATCAAGCCAGTGCGAAAATAAGGCAGTGAGTGAGACATAGATATATATAGTTAGTGCGAAACAGAGACATGGAGGCAGCCAGGACGAAAAGTAGACATAGAGACAGTCAT
GGAGACAGCAAGCCAGGACGAAAACAAGACATCGAGGCAGCACGAAAACGAGCCATAGAGTGGAATTGACAGTGAGCAAGCCATGGCGAAAACTAGACATAGAGATAGGCAGAC

In [47]:
len("""During the course of a study of the growth of human adenoid tissue in roller tube culture, a characteristic degeneration has been encountered which has been found to be serially transmissible in other tissue cultures.
From the present evidence it appears that an unidentified, possibly new, tissue culture cytopathogenic agent has been isolated repeatedly from human adenoids undergoing spontaneous degeneration in tissue culture.
The filterability and the inability to cultivate the agent on bacteriological media and to demonstrate organisms in stained tissue culture preparations would indicate that the agent belongs to the group of viruses or rickettsiae.""")*4

2640