# gssnng
Gene Set Scoring on the Nearest Neighbor Graph (gssnng) for Single Cell RNA-seq (scRNA-seq)
Works with AnnData objects stored as h5ad files. Takes values from adata.X.

The method works by sampling nearest neighbors for each cell, creating a mini-pseudobulk expression profile, and performing single sample gene set scoring.  This gives each cell a score and preserves gradients across clusters. 

https://github.com/Gibbsdavidl/gssnng

Notebook author: David L Gibbs (david.gibbs@isbscience.org)

In [None]:
# first let's install the package from github
# and clone the repo for the example data
!pip install git+https://github.com/Gibbsdavidl/gssnng
!git clone https://github.com/Gibbsdavidl/gssnng

In [None]:
from gssnng import score_cells
import scanpy as sc
import matplotlib
%matplotlib inline

In [None]:
## !! set up for google colab notebook !! ##
# these file paths point to the cloned repo from above #
anndata_file = '/content/gssnng/gssnng/test/data/pbmc3k_processed.h5ad'
gene_set_file = '/content/gssnng/gssnng/test/data/cibersort_lm22.gmt'

# ! Note the direction labels on the gene set names ! #
gene_set_names = ['B.cells.naive.up', 'B.cells.memory.up', 'Plasma.cells.up', 'T.cells.CD8.up', 'T.cells.CD4.naive.up', 'T.cells.CD4.memory.resting.up', 'T.cells.CD4.memory.activated.up', 'T.cells.follicular.helper.up', 'T.cells.regulatory..Tregs.up', 'T.cells.gamma.delta.up', 'NK.cells.resting.up', 'NK.cells.activated.up', 'Monocytes.up', 'Macrophages.M0.up', 'Macrophages.M1.up', 'Macrophages.M2.up', 'Dendritic.cells.resting.up', 'Dendritic.cells.activated.up', 'Mast.cells.resting.up', 'Mast.cells.activated.up', 'Eosinophils.up', 'Neutrophils.up']

In [None]:
# read in the 10x genomics example data set, same as used in the scanpy tuts
q = sc.read_h5ad(anndata_file)

# recompute the nearest neighbor graph to give plenty of neighbors to each cell
sc.pp.neighbors(q, n_neighbors=32)

In [None]:
# Then we'll produce a gene set score, for each cell, for each gene set in the .gmt file.
# Neighbors are sampled within the groupby parameter. Groups run in parallel, set cores parameter as appropriate.

score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="singscore",
        method_params={'normalization':'theoretical'},
        samp_neighbors=29,
        ranked=True,
        cores=6
    )


In [None]:
# each gene set is saved as a column in the AnnData.obs data.frame
q.obs

In [None]:
# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 

---

###########  THE TOUR OF METHODS #################

---

In [None]:
### RANK BIASED OVERLAP ###
### https://dl.acm.org/doi/10.1145/1852102.1852106

# del the previous scores
q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

# rescore
score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="rank_biased_overlap",
        method_params={'rbo_depth':50},
        samp_neighbors=29,
        ranked=True,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 


In [None]:
### MEAN Z SCORES COUNTS ###
### average of gene-wise Z scores

# del the previous scores
q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

# rescore
score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="mean_z",
        method_params={},
        samp_neighbors=29,
        ranked=False,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 

In [None]:
### MEDIAN RANKS ###

# del the previous scores
q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

# rescore
score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="median_score",
        method_params={},
        samp_neighbors=29,
        ranked=True,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 


In [None]:
# ROBUST STANDARDIZED COUNTS

# del the previous scores
q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

# rescore
score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="robust_std",
        method_params={},
        samp_neighbors=29,
        ranked=False,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 

In [None]:
### AVERAGE COUNTS ###

# del the previous scores
q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="average_score",
        method_params={},
        samp_neighbors=29,
        ranked=True,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 


In [None]:
### SUMMED UP ###

# del the previous scores
q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

# rescore
score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="summed_up",
        method_params={},
        samp_neighbors=29,
        ranked=True,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 

In [None]:
### experimental!! ###

q.obs.drop(gene_set_names, axis=1, inplace=True)
q.obs.drop('gssnng_groupby', axis=1, inplace=True)

# rescore
score_cells.with_gene_sets(
        adata=q,
        gene_set_file=gene_set_file,
        groupby="louvain",
        smooth_mode='connectivity',
        recompute_neighbors=0,
        score_method="ssgsea",
        method_params={'omega':4},
        samp_neighbors=29,
        ranked=True,
        cores=6
    )

# let's visualize the scores
# There appears to be a bug with showing color bar on google colab... 
sc.pl.umap(q, color=['T.cells.CD8.up','B.cells.naive.up','louvain'], wspace=0.1, colorbar_loc=None) 
