In [1]:
!date

Fri Oct 23 07:23:08 PDT 2020


# Summary stats

In [1]:
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches as mpatches
import scanpy as sc
from scipy.stats import ks_2samp, ttest_ind
import ast
from scipy.sparse import csr_matrix
import warnings
warnings.filterwarnings('ignore')

def nd(arr):
    return np.asarray(arr).reshape(-1)

fsize=20

plt.rcParams.update({'font.size': fsize})
%config InlineBackend.figure_format = 'retina'

In [2]:
ss    = anndata.read_h5ad("../../data/notebook/revision/no_filter_gene.h5ad")
tenx  = anndata.read_h5ad("../../data/notebook/revision/10xv3_gene.h5ad")
mfish = anndata.read_h5ad("../../data/notebook/revision/merfish-updated.h5ad")

# Avg # genes detected per cell

In [16]:
print("SMART-Seq {:,.0f}".format((ss.layers["norm"]>0).sum(axis=1).mean()))

SMART-Seq 10,333


In [15]:
print(f"10xv3   {(tenx.X>0).sum(axis=1).mean():,.0f}")
print(f"MERFISH {(mfish.X>0).sum(axis=1).mean():,.0f}")

10xv3   5,891
MERFISH 72


# Number of cells

In [3]:
print("SMART-Seq {:,}".format(ss.shape[0]))
print("10xv3 {:,}".format(tenx.shape[0]))
print("MERFISH {:,}".format(mfish.shape[0]))

SMART-Seq 6,295
10xv3 94,162
MERFISH 280,327


# Number of Genes

In [4]:
print("SMART-Seq {:,}".format(ss.shape[1]))
print("10xv3 {:,}".format(tenx.shape[1]))
print("MERFISH {:,}".format(mfish.shape[1]))

SMART-Seq 31,053
10xv3 31,053
MERFISH 254


# Number of detected genes per cell (average)

In [5]:
print("SMART-Seq {:,.0f}".format((ss.layers["X"]>0).sum(axis=1).mean()))
print("10xv3 {:,.0f}".format((tenx.X>0).sum(axis=1).mean()))
print("MERFISH {:,.0f}".format((mfish.layers["X"]>0).sum(axis=1).mean()))

SMART-Seq 10,333
10xv3 5,891
MERFISH 76


# Number of clusters

In [6]:
# cluster
print("SMART-Seq {:,}".format(ss.obs.cluster_label.nunique()))
print("10xv3 {:,}".format(tenx.obs.cluster_label.nunique()))
print("MERFISH {:,}".format(mfish.obs.label.nunique()))

SMART-Seq 62
10xv3 147
MERFISH 99


In [15]:
# subclass
print("SMART-Seq {:,}".format(ss.obs.subclass_label.nunique()))
print("10xv3 {:,}".format(tenx.obs.subclass_label.nunique()))
print("MERFISH {:,}".format(mfish.obs.subclass.nunique()))

SMART-Seq 18
10xv3 22
MERFISH 25


In [18]:
# class
print("SMART-Seq {:,}".format(ss.obs.class_label.nunique()))
print("10xv3 {:,}".format(tenx.obs.class_label.nunique()))
print("MERFISH {:,}".format(mfish.obs.class_label.nunique()))

SMART-Seq 4
10xv3 4
MERFISH 3


# Reads Processed

In [7]:
"SMART-Seq {:,} reads".format(15229289828)

'SMART-Seq 15,229,289,828 reads'

In [8]:
tenx_reads = [1048408446,
            1466307916,
            2941873323,
            1152751524,
            1708764205,
            1926459540,
            1600417861,
            1897698358,
            1919010597,
            2247342604,
            2465213703,
            2321988388]

In [9]:
"10x: {:,} reads".format(np.sum(tenx_reads))

'10x: 22,696,236,465 reads'

# Reads per cell

In [10]:
"SMART-Seq {:,.0f} reads per cell".format(15229289828/ss.shape[0])

'SMART-Seq 2,419,268 reads per cell'

In [11]:
"10x {:,.0f} reads per cell".format(np.sum(tenx_reads)/tenx.shape[0])

'10x 241,034 reads per cell'

In [12]:
"SMART-Seq was sequenced {:,.0f}x deeper per cell than 10xv3.".format(15229289828/ss.shape[0]/(np.sum(tenx_reads)/tenx.shape[0]))

'SMART-Seq was sequenced 10x deeper per cell than 10xv3.'

# Isoform

In [13]:
ss_iso = anndata.read_h5ad("../../data/notebook/revision/no_filter_isoform.h5ad")

In [14]:
print("SMART-Seq {:,.0f}".format((ss_iso.layers["X"]>0).sum(axis=1).mean()))

SMART-Seq 20,319


In [33]:
mfish.obs.groupby("subclass")['label'].nunique()

subclass
Astrocytes           3
Endothelial          1
L5_IT                4
L5_PT                5
L6_CT                9
L6_IT                3
L6_IT_Car3           1
L6b                  3
L23_IT               5
L45_IT               7
L56_NP               2
Lamp5                8
Microglia            1
OPC                  1
Oligodendrocytes     3
PVM                  2
Pericytes            1
Pvalb               12
SMC                  1
Sncg                 2
Sst                  8
Sst_Chodl            1
VLMC                 1
Vip                 11
other                4
Name: label, dtype: int64