In [None]:
import scanpy as sc
sc.settings.verbosity = 3
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80,facecolor='white')

from genevector.data import GeneVectorDataset
from genevector.model import GeneVector
from genevector.embedding import GeneEmbedding, CellEmbedding

In [None]:
adata = sc.read("PBMC.h5ad")
adata

# Process Data

In [None]:
dataset = GeneVectorDataset(adata)

In [None]:
cmps = GeneVector(dataset,
                  output_file="pbmc.vec",
                  emb_dimension=100,
                  batch_size=10000)

# Setup Model

# Train Network

In [None]:
cmps.train(1000,threshold=0.)
embed = GeneEmbedding("pbmc.vec", dataset, vector="average")
print(embed.compute_similarities("IFIT1")[:30])

In [None]:

cembed = CellEmbedding(dataset, embed)
adata = cembed.get_adata()
sc.pl.umap(adata,palette="Dark2",color=["sample","celltype"], wspace=0.3,
                                 title=["Corrected","Cell Type"],add_outline=True,size=15)

In [None]:
cembed.batch_correct(column="sample",reference="control")
adata = cembed.get_adata()
import matplotlib
matplotlib.rcParams['svg.fonttype'] = "none"
sc.pl.umap(adata,palette="Dark2",color=["sample","celltype"], wspace=0.3,
                                 title=["Corrected","Cell Type"],add_outline=True,size=15,save="pca_corrected.svg")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(font_scale=1.)
df = embed.compute_similarities("CD8A").head(20)
print(df)
fig,ax = plt.subplots(1,1,figsize=(3,6))
sns.barplot(data=df,y="Gene",x="Similarity",palette="Dark2",ax=ax)

In [None]:
gdata = embed.get_adata()
metagenes = embed.get_metagenes(gdata)

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
embed.score_metagenes(adata, metagenes)

In [None]:
for clust, genes in metagenes.items():
    if "IFIT1" in genes:
        print(clust,genes)
        embed.plot_metagene(gdata, mg=clust, title="Interferon MG")
        break

In [None]:
sc.pl.umap(adata,color=["{}_SCORE".format(clust)],
           add_outline=True,title="ISG Signature",size=10)

# Annotate Cell Types with Pseudo-probabilities

In [None]:
genes = [x for x in genes if x in adata.var.index.tolist()]
sc.pl.matrixplot(adata,genes,groupby="detailed_celltype")

# Plot All Metagenes

In [None]:
sns.set(font_scale=0.25)
embed.plot_metagenes_scores(adata,metagenes,"detailed_celltype")

# Find Similar Genes to Cell Type

In [None]:
import pandas
markers = cembed.get_predictive_genes(adata,"celltype",n_genes=5)
pandas.DataFrame.from_dict(markers).T

In [None]:
nmarkers = dict()
adata = cembed.phenotype_probability(adata,markers,nmarkers)

In [None]:
sc.pl.umap(adata,color="genevector")