In [1]:
import anndata
import numpy as np

from scquint.differential_splicing import run_differential_splicing

In [2]:
# Tabula Muris intron count matrix can be downloaded at 
# https://figshare.com/articles/dataset/scQuint_data_objects_-_Tabula_Muris/14471904?file=27696714
adata = anndata.read_h5ad("/home/gbenegas/Desktop/tabula_muris/adata_spl.h5ad")
adata

AnnData object with n_obs × n_vars = 44518 × 29965
    obs: 'FACS.selection', 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'sex', 'subtissue', 'tissue', 'n_genes', 'n_counts', 'louvain', 'leiden', 'cell_type', 'plate_id'
    var: 'chromosome', 'start', 'end', 'cluster', 'left_exon_count', 'left_genes', 'right_exon_count', 'right_genes', 'genes', 'n_genes', 'genes_cluster', 'n_genes_cluster', 'annotated', 'gene_id', 'original_cluster', 'n_start', 'strand', 'cluster_type'

In [3]:
adata.obs[adata.obs.tissue=="Heart"].cell_ontology_class.value_counts()[:10]

fibroblast of cardiac tissue           2118
endothelial cell of coronary artery    1179
monocyte                                490
endocardial cell                        164
atrial myocyte                          159
valve cell                              158
smooth muscle cell                       65
ventricular myocyte                      46
T cell                                   37
B cell                                   15
Name: cell_ontology_class, dtype: int64

In [4]:
# Specify the indices of the two groups to be compared
cell_idx_a = np.where((adata.obs.tissue=="Heart") & (adata.obs.cell_ontology_class=="endocardial cell"))[0]
cell_idx_b = np.where((adata.obs.tissue=="Heart") & (adata.obs.cell_ontology_class=="atrial myocyte"))[0]
len(cell_idx_a), len(cell_idx_b)

(164, 159)

In [5]:
clusters, introns = run_differential_splicing(
    adata, cell_idx_a, cell_idx_b, min_cells_per_cluster=50, min_total_cells_per_intron=50, n_jobs=1,
)

Trying to set attribute `.var` of view, copying.
Trying to set attribute `.var` of view, copying.
Trying to set attribute `.var` of view, copying.
Trying to set attribute `.var` of view, copying.
Trying to set attribute `.var` of view, copying.
Trying to set attribute `.var` of view, copying.


sample sizes:  164 159
filter_min_cells_per_feature
filter_singletons
filter_min_cells_per_cluster
filter_singletons
filter_min_cells_per_cluster
filter_singletons
Number of intron clusters:  84
Number of introns:  169
Testing intron cluster  0


  allow_unreachable=True)  # allow_unreachable flag


Done


In [6]:
# clusters has information about all the intron clusters that were tested
clusters.head()

Unnamed: 0_level_0,p_value,ll_null,ll,n_classes,gene_id,max_abs_delta_psi,max_abs_lfc_psi,ranking,p_value_adj
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1269,4.19845e-47,-358.7117348210924,-254.82372878310213,2,ENSMUSG00000061904,0.798296,8.692238,0,3.5266980000000003e-45
12347,5.336709999999999e-41,-304.4631111680536,-214.55895498416908,2,ENSMUSG00000032366,0.797576,3.716576,1,2.241418e-39
6211,4.635299e-39,-452.0730904495759,-366.60815780882535,2,ENSMUSG00000025781,0.717983,2.737905,2,1.297884e-37
11316,1.163422e-36,-250.6195541833696,-170.64721919349435,2,ENSMUSG00000008892,0.792927,5.902056,3,2.443186e-35
15076,1.65341e-30,-118.71946751177695,-52.81864338786248,2,ENSMUSG00000018845,0.893422,5.195932,4,2.777729e-29


In [7]:
# introns has information about all the introns corresponding to the tested clusters
introns.head()

Unnamed: 0,chromosome,start,end,cluster,left_exon_count,left_genes,right_exon_count,right_genes,genes,n_genes,...,original_cluster,n_start,strand,cluster_type,psi_a,psi_b,delta_psi,lfc_psi,abs_delta_psi,abs_lfc_psi
0,chr10,91119707,91121922,1269,9,['ENSMUSG00000061904'],5,['ENSMUSG00000061904'],['ENSMUSG00000061904'],1,...,1269,1,-,shared_acceptor,0.998065,0.199769,0.798296,2.3208,0.798296,2.3208
1,chr10,91119707,91122207,1269,9,['ENSMUSG00000061904'],2,['ENSMUSG00000061904'],['ENSMUSG00000061904'],1,...,1269,1,-,shared_acceptor,0.001935,0.800231,-0.798296,-8.692238,0.798296,8.692238
2,chr10,128491034,128491720,1417,11,['ENSMUSG00000090841'],1,['ENSMUSG00000090841'],['ENSMUSG00000090841'],1,...,1417,1,-,shared_acceptor,0.765274,0.207626,0.557649,1.881993,0.557649,1.881993
3,chr10,128491034,128492059,1417,11,['ENSMUSG00000090841'],14,['ENSMUSG00000090841'],['ENSMUSG00000090841'],1,...,1417,1,-,shared_acceptor,0.234726,0.792374,-0.557649,-1.755205,0.557649,1.755205
4,chr11,4702221,4703904,1472,4,['ENSMUSG00000059534'],2,['ENSMUSG00000059534'],['ENSMUSG00000059534'],1,...,1472,1,-,shared_acceptor,0.033252,0.104812,-0.071559,-1.656266,0.071559,1.656266


In [8]:
# Number of significant differential splicing events
(clusters.p_value_adj < 0.05).sum()

45

In [9]:
# The introns belonging to the intron cluster with the lowest p-value
introns[introns.cluster==1269]

Unnamed: 0,chromosome,start,end,cluster,left_exon_count,left_genes,right_exon_count,right_genes,genes,n_genes,...,original_cluster,n_start,strand,cluster_type,psi_a,psi_b,delta_psi,lfc_psi,abs_delta_psi,abs_lfc_psi
0,chr10,91119707,91121922,1269,9,['ENSMUSG00000061904'],5,['ENSMUSG00000061904'],['ENSMUSG00000061904'],1,...,1269,1,-,shared_acceptor,0.998065,0.199769,0.798296,2.3208,0.798296,2.3208
1,chr10,91119707,91122207,1269,9,['ENSMUSG00000061904'],2,['ENSMUSG00000061904'],['ENSMUSG00000061904'],1,...,1269,1,-,shared_acceptor,0.001935,0.800231,-0.798296,-8.692238,0.798296,8.692238


In [10]:
# The results can be visualized with 
# the cellxgene browser https://scquint.ds.czbiohub.org/tabula-muris/
# and UCSC Genome Browser https://genome.ucsc.edu/s/gbenegas/tabulamuris