<font size="+3.8">Scanpy single-cell pre-processing</font>  
<font size="+1.5"></font>  

Aim: Preprocess mouse brain single-cell data from Ximerakis 2019 Nat Neuroscience

In [None]:
from datetime import date
date.today().strftime('%d/%m/%Y')

In [None]:
import os
os.getlogin()

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import platform
import random
import seaborn as sns

In [None]:
import utils

In [None]:
os.environ['CONDA_DEFAULT_ENV'] # conda env

In [None]:
platform.platform()

In [None]:
sc.settings.verbosity = 3

In [None]:
main_dir='/run/user/1000/gvfs/smb-share:server=138.245.4.35,share=bd-dichgans/SF' # Linux

In [None]:
main_dir='\\\isdsynnas.srv.med.uni-muenchen.de\BD-Dichgans\SF' # Win

In [None]:
dataset_name = "Ximerakis2019"
organism = "Mouse"

# Load data

Downloaded from: https://singlecell.broadinstitute.org/single_cell/study/SCP263/aging-mouse-brain 06/03/2023

In [None]:
ad = sc.read_text(os.path.join(main_dir, "P06_vasc_scRNAseq", "Ximerakis2019", "expression_Aging_mouse_brain_portal_data_updated.txt")).T
ad

In [None]:
ad.obs.head()

In [None]:
ad.var_names

In [None]:
# add metadata
meta = pd.read_csv(os.path.join(main_dir, "P06_vasc_scRNAseq", "Ximerakis2019", "meta_Aging_mouse_brain_portal_data.txt"), sep="\t", skiprows=[1])
meta.head()

In [None]:
assert all(ad.obs.index == meta.NAME)
ad.obs = meta
ad

In [None]:
pd.value_counts(ad.obs["cell_class"])

In [None]:
pd.value_counts(ad.obs["cell_type"])

In [None]:
ad.obs["age"] = ad.obs["all_cells_by_age"]
pd.value_counts(ad.obs["age"])

In [None]:
pd.crosstab(ad.obs["cell_type"], ad.obs["cell_class"])

In [None]:
# also see website

In [None]:
new_names = {
    'Endothelial cells': {'EC'},
    'Oligos': {'OLG'},
    'Olfactory ensheathing glia': {'OEG'},
    'Astrocytes': {'ARP','ASC'},
    'Pericytes': {'PC'},
    'SMCs': {'VSMC'},
    'Neurons': {'NRP','NEUR_immature','NEUR_mature','NendC'},
    'OPCs': {'OPC'},
    'VLMCs': {'VLMC','ABC'},
    'Microglia/Macrophages': {'MG','MAC'},
    'Neural stem cells': {'NSC'},
    'Ependymal cells': {'EPC'},
    'Hypendymal cells': {'HypEPC','TNC'},
    'Choroid plexus epithelial cells':{'CPC'},
    'Immune_Other': {'MNC','DC','NEUT'},
    'Hemoglobin-expressing vascular cells':{'Hb_VC'},
}
result = {}
for key, value in new_names.items():
    for elem in value:
        result[(elem)] = key
new_cluster_names = dict(sorted(result.items()))
print(new_cluster_names)

In [None]:
ad.obs['clusters'] = (
    ad.obs['cell_type']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
pd.value_counts(ad.obs["clusters"])

In [None]:
sc.pl.dotplot(ad, var_names=['Cldn5',"Aqp4","Mbp",'Foxj1','Pdgfrb','Vtn','Kcnj8','Rgs5','Atp13a5','Tagln','Acta2','Myh11'], groupby="clusters", swap_axes=False);

Last 3 cols are SMC markers

Check if data is normalized:

In [None]:
ad.layers

In [None]:
sns.histplot(ad.X.sum(1), kde=False)
plt.show()

In [None]:
# show expression of 100 random genes (across all spots)
random_genes=random.sample(range(0, ad.X.shape[1]), 100)
adata_sub = ad[:,random_genes]
exp=pd.DataFrame(np.matrix(adata_sub.X))
pl1=sns.displot(data=pd.melt(exp),x='value',height=4,hue='variable',kind="kde",warn_singular=False,legend=False,palette=list(np.repeat('#086da6',100)), lw=0.3) # genes with 0 expression are excluded
pl1.set(xlim=(-0.5, 7),ylim=(0,0.007));
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(4,4)})
pl=sns.histplot(data=pd.melt(exp),x='value',binwidth=0.5,legend=True,palette=list(np.repeat('#086da6',100)))
pl.set(xlim=(0, 10));
sns.set_style("ticks")
plt.show()

In [None]:
np.matrix(adata_sub.X)

Data seems to be normalized

In [None]:
ad.layers["normalized"] = ad.X.copy() # save normalized + log-transformed (but unscaled) counts - retrieve via adata.X = adata.layers["normalized"]

In [None]:
ad

In [None]:
ad.layers

In [None]:
sc.tl.pca(ad)
sc.pp.neighbors(ad)
sc.tl.umap(ad)
sc.tl.leiden(ad)

In [None]:
sc.pl.umap(ad, color=['clusters'])

In [None]:
sc.pl.umap(ad, color=['Foxf2'])

In [None]:
adata=ad
del ad

# Focus on: Foxf2

In [None]:
adata_backup = adata

In [None]:
# drop aged samples
# adata = adata[adata.obs['age'].isin(['2-3mo'])]

In [None]:
gene="Foxf2"

## Plot

In [None]:
sc.pl.matrixplot(adata, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
utils.summarize_gene_expression(adata = adata, gene = gene, groupby = "clusters", 
                                study_name = dataset_name, organism = organism,
                                export = True, output_dir = os.path.join(main_dir, "P06_Foxf2_per_celltype", "Foxf2_summarized")
                               )

# Focus on: Other genes

In [None]:
target_genes = ["Foxo1", "Tek", "Nos3", "Htra1", "Egfl8", "Flt1", "Kdr", "Ptprb", "Nrp1", "Nrp2", "Efnb2", "Itgb1", "Itga6", "Angpt2", "Cdh5", "Cldn5", "Ocln", "Ctnnb1"]

In [None]:
other_genes_results = {
    gene: utils.summarize_gene_expression(adata, gene, study_name = dataset_name, organism = organism, groupby = "clusters",
                                          output_dir=os.path.join(main_dir, "P06_Foxf2_per_celltype", "Other_genes_summarized"), export=True
                                         ) for gene in target_genes
}

In [None]:
# some plots

In [None]:
sc.pl.matrixplot(adata, [target_genes[0]], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [target_genes[0]], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.matrixplot(adata, [target_genes[1]], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [target_genes[1]], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

# Focus on: Aging

In [None]:
adata = adata_backup

In [None]:
pd.value_counts(adata.obs["age"])

## Foxf2 by age group

In [None]:
gene=["Foxf2","Foxo1","Tek","Nos3"]

### All cell types

In [None]:
sc.pl.dotplot(adata, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="All cell types")

### ECs only

In [None]:
# ECs only
adata_ecs = adata[adata.obs['clusters'].isin(['Endothelial cells'])]
adata_ecs

In [None]:
pd.value_counts(adata_ecs.obs["age"])

In [None]:
sc.pl.dotplot(adata_ecs, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="ECs")

### PCs only

In [None]:
adata_pcs = adata[adata.obs['clusters'].isin(['Pericytes'])]
pd.value_counts(adata_pcs.obs["age"])

In [None]:
sc.pl.dotplot(adata_pcs, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="PCs")

### SMCs only

In [None]:
adata_pcs = adata[adata.obs['clusters'].isin(['SMCs'])]
pd.value_counts(adata_pcs.obs["age"])

In [None]:
sc.pl.dotplot(adata_pcs, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="SMCs")

### Neurons only

In [None]:
adata_neur = adata[adata.obs['clusters'].isin(['Neurons'])]
pd.value_counts(adata_neur.obs["age"])

In [None]:
sc.pl.dotplot(adata_neur, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Neurons")

### Astrocytes only

In [None]:
adata_astro = adata[adata.obs['clusters'].isin(['Astrocytes'])]
pd.value_counts(adata_astro.obs["age"])

In [None]:
sc.pl.dotplot(adata_astro, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Astrocytes")

### Oligos only

In [None]:
adata_olig = adata[adata.obs['clusters'].isin(['Oligos'])]
pd.value_counts(adata_olig.obs["age"])

In [None]:
sc.pl.dotplot(adata_olig, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Oligos")

### Microglia only

In [None]:
adata_mg = adata[adata.obs['clusters'].isin(['Microglia/Macrophages'])]
pd.value_counts(adata_mg.obs["age"])

In [None]:
sc.pl.dotplot(adata_mg, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Microglia/Macrophages")

## Correlate gene expression (Foxf2 and Foxo1)

### ECs

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adata_ecs

In [None]:
adat = adata_ecs
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

Exclude dropouts (cells with 0 expression of both genes)

In [None]:
coex = (adata_ecs[:,'{}'.format("Foxf2")].X > 0) | (adata_ecs[:,'{}'.format("Foxo1")].X > 0)

In [None]:
adata_ecs_above0 = adata_ecs[coex]
adata_ecs_above0

In [None]:
pd.value_counts(adata_ecs_above0.obs["age"])

In [None]:
sc.pl.dotplot(adata_ecs_above0, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="ECs")

In [None]:
adat = adata_ecs_above0
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

### All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adat = adata
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

Exclude dropouts (cells with 0 expression of both genes)

In [None]:
coex = (adat[:,'{}'.format("Foxf2")].X > 0) | (adat[:,'{}'.format("Foxo1")].X > 0)

In [None]:
adata_above0 = adat[coex]
adata_above0

In [None]:
pd.value_counts(adata_above0.obs["age"])

In [None]:
sc.pl.dotplot(adata_above0, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized")

In [None]:
adat = adata_above0
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

## Pseudobulk analysis (decoupler)

Because correlation on single cell level is biased by large number of dropouts

In [None]:
import decoupler as dc

In [None]:
adata

In [None]:
adata.obs["sample"] = adata.obs['NAME'].str.extract('_([0-9]+)_?')
pd.value_counts(adata.obs["sample"])

In [None]:
pd.crosstab(adata.obs["sample"], adata.obs["age"]).T

In [None]:
padata = dc.get_pseudobulk(adata, sample_col='sample', groups_col=None, layer='normalized', min_prop=0.01, min_smpls=0)

In [None]:
padata

### Correlate gene expression (Foxf2 and Foxo1)

#### All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
padata

In [None]:
adat = padata
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

#### ECs

In [None]:
padata = dc.get_pseudobulk(adata, sample_col='sample', groups_col='clusters', layer='normalized', min_prop=0.2, min_smpls=3)

In [None]:
padata

In [None]:
# ECs only
padata_ecs = padata[padata.obs['clusters'].isin(['Endothelial cells'])]
padata_ecs

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adat = padata_ecs
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

## Denoising (MAGIC)

In [None]:
import magic
import scprep

### Correlate gene expression (Foxf2 and Foxo1)

#### ECs

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adata_EC = adata[adata.obs.clusters == "Endothelial cells"]

In [None]:
adata_EC

In [None]:
#adata_EC = adata_EC[adata_EC.obs.age == "21-22mo"]

In [None]:
adata_EC

In [None]:
matrix = pd.DataFrame(adata_EC.X)
matrix.columns = adata_EC.var.index.tolist()

In [None]:
matrix.head()

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=1500)

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

##### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

##### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(matrix, genes=['Foxf2', 'Foxo1', 'Nos3'])

##### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

##### All ECs

In [None]:
np.corrcoef(emt_magic[['Foxf2','Foxo1']], rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))

scprep.plot.scatter(x=matrix['Foxf2'], y=matrix['Foxo1'], c=matrix['Nos3'],  ax=ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic['Foxf2'], y=emt_magic['Foxo1'], c=emt_magic['Nos3'], ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0.2,0.2), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

##### 21-22mo EC only

In [None]:
np.corrcoef(emt_magic[['Foxf2','Foxo1']], rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))

scprep.plot.scatter(x=matrix['Foxf2'], y=matrix['Foxo1'], c=matrix['Nos3'],  ax=ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic['Foxf2'], y=emt_magic['Foxo1'], c=emt_magic['Nos3'], ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0.2,0.2), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

##### 2-3mo ECs only

In [None]:
np.corrcoef(emt_magic[['Foxf2','Foxo1']], rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))

scprep.plot.scatter(x=matrix['Foxf2'], y=matrix['Foxo1'], c=matrix['Nos3'],  ax=ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic['Foxf2'], y=emt_magic['Foxo1'], c=emt_magic['Nos3'], ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0.2,0.2), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

#### PCs

In [None]:
adata_EC = adata[adata.obs.clusters == "Pericytes"]

In [None]:
matrix = pd.DataFrame(adata_EC.X)
matrix.columns = adata_EC.var.index.tolist()

In [None]:
matrix.head()

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=1500)

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

##### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

##### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(matrix, genes=['Foxf2', 'Foxo1', 'Nos3'])

##### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[['Foxf2','Foxo1']], rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))

scprep.plot.scatter(x=matrix['Foxf2'], y=matrix['Foxo1'], c=matrix['Nos3'],  ax=ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic['Foxf2'], y=emt_magic['Foxo1'], c=emt_magic['Nos3'], ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0.2,0.2), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

#### SMCs

In [None]:
adata_EC = adata[adata.obs.clusters == "SMCs"]

In [None]:
matrix = pd.DataFrame(adata_EC.X)
matrix.columns = adata_EC.var.index.tolist()

In [None]:
matrix.head()

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=1500)

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

##### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

##### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(matrix, genes=['Foxf2', 'Foxo1', 'Nos3'])

##### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[['Foxf2','Foxo1']], rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))

scprep.plot.scatter(x=matrix['Foxf2'], y=matrix['Foxo1'], c=matrix['Nos3'],  ax=ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic['Foxf2'], y=emt_magic['Foxo1'], c=emt_magic['Nos3'], ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0.2,0.2), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

#### All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
matrix = pd.DataFrame(adata.X)
matrix.columns = adata.var.index.tolist()

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=1500)

Note: Skipped normalization as data is already log-normalized

In [None]:
adata.layers

##### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

##### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(matrix, genes=['Foxf2', 'Foxo1', 'Nos3'])

##### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[['Foxf2','Foxo1']], rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))

scprep.plot.scatter(x=matrix['Foxf2'], y=matrix['Foxo1'], c=matrix['Nos3'],  ax=ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic['Foxf2'], y=emt_magic['Foxo1'], c=emt_magic['Nos3'], ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0,0), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

# Session Info

In [None]:
sc.logging.print_versions()