# Stage 3

Labelling

In [1]:
import pandas as pd
import numpy as np
import scanpy as sc
import os
import glob
import re
from joblib import Parallel, delayed
from functools import partial
import scipy.sparse
import seaborn as sns
import scanpy_gpu_funcs as rsf
import cudf
import cupy as cp
from cuml.decomposition import PCA
from scipy.sparse import issparse
from SCTransform import SCTransform
from tqdm import tqdm
import pickle

import matplotlib.pyplot as plt
from matplotlib import rcParams
sc.set_figure_params(dpi= 100, dpi_save = 300)
rcParams['figure.figsize'] = 5,5

from sklearn.neighbors import LocalOutlierFactor

os.chdir('/active/paper/')

# Load full adata

In [None]:
with open('input/adata/midbrain/adata.pickle', 'rb') as f:
    adata = pickle.load(f)

# Set seed

In [1]:
import random

def seed_everything(seed=42):
    """"
    Seed everything.
    """   
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(12)

NameError: name 'os' is not defined

In [None]:
adata

# Create cell type label

In [None]:
adata.obs['cell_type'] = 'unknown'

# Level 1

In [None]:
set(['_'.join(x.split('_')[:2]) for x in adata.obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3'), 'comparison'] = 'cluster_3'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata.layers['count'] = adata.X.copy()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata.layers['lognorm'] = adata.X.copy()
adata.X = adata.layers['count'].copy()

sc.tl.rank_genes_groups(adata, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # reference='cluster_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_1')

In [None]:

fig, axs = plt.subplots(nrows=adata.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 25))
for row, cluster in enumerate(adata.obs['comparison'].unique()):
    for col, section in enumerate(adata.obs.sample_id.unique()):
        sc.pl.spatial(adata[adata.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Cartpt neurons

In [None]:
df = sc.get.rank_genes_groups_df(adata, 
                           key = 'level_1', 
                           group = 'cluster_0')

df.loc[df['pvals_adj'] < 0.01].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Cartpt neurons (Edinger-Westphal nucleus)' 

### Dopamine neurons

In [None]:
sc.get.rank_genes_groups_df(adata, 
                           key = 'level_1', 
                           group = 'cluster_1').iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Dopaminergic' 

### Snap25+

In [None]:
df = sc.get.rank_genes_groups_df(adata, 
                           key = 'level_1', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25' 

### Plp1+

In [None]:
df = sc.get.rank_genes_groups_df(adata, 
                           key = 'level_1', 
                           group = 'cluster_3')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_3'), 'cell_type'] = 'Plp1' 

# Level 2: Snap25 cells

In [None]:
set(['_'.join(x.split('_')[:3]) for x in adata[adata.obs['cell_type'] == 'Snap25'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3'), 'comparison'] = 'cluster_3'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_2_Snap25')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 25))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### GABAergic, Sst+, Npy+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Snap25', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_GABAergic-Sst-Npy'

### Thalamic 

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Snap25', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Thalamic' 

### Central

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Snap25', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Central' 

### Cortical

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Snap25', 
                           group = 'cluster_3')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_3'), 'cell_type'] = 'Snap25_Cortical' 

# Level 2: Plp1 cells

In [None]:
set(['_'.join(x.split('_')[:3]) for x in adata[adata.obs['cell_type'] == 'Plp1'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_2_Plp1')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 25))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Erythroid

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Plp1', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Erythroid' 

### Astrocyte-like (with ageing proliferation)

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Plp1', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like' 

### Plp1 +++

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Plp1', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:10]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Plp1_Plp1+++' 

# Level 2: Dopaminergic

In [None]:
set(['_'.join(x.split('_')[:3]) for x in adata[adata.obs['cell_type'] == 'Dopaminergic'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_2_Dopaminergic')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Lower Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Dopaminergic', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Dopaminergic_Lower-Complexity' 

### Higher-Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_2_Dopaminergic', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Dopaminergic_Higher-Complexity' 

# Level 3: Dopaminergic_Higher-Complexity

In [None]:
set(['_'.join(x.split('_')[:4]) for x in adata[adata.obs['cell_type'] == 'Dopaminergic_Higher-Complexity'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_3_Dopaminergic_Higher-Complexity')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Plp1+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Dopaminergic_Higher-Complexity', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1+' 

### Plp1-

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Dopaminergic_Higher-Complexity', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-' 

# Level 4: Dopaminergic_Higher-Complexity_Plp1-

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Dopaminergic_Higher-Complexity_Plp1-'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Dopaminergic_Higher-Complexity_Plp1-')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Malat1+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Dopaminergic_Higher-Complexity_Plp1-', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1+' 

### Malat1-

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Dopaminergic_Higher-Complexity_Plp1-', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1-' 

# Level 5: Dopaminergic_Higher-Complexity_Plp1-_Malat1-

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Dopaminergic_Higher-Complexity_Plp1-_Malat1-'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_5_Dopaminergic_Higher-Complexity_Plp1-_Malat1-')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### VTA

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Dopaminergic_Higher-Complexity_Plp1-_Malat1-', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1-_VTA' 

### SN

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Dopaminergic_Higher-Complexity_Plp1-_Malat1-', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1-_SN' 

# Level 5: Dopaminergic_Higher-Complexity_Plp1-_Malat1+

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Dopaminergic_Higher-Complexity_Plp1-_Malat1+'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0_0_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_1_0_0_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_5_Dopaminergic_Higher-Complexity_Plp1-_Malat1+')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### VTA

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Dopaminergic_Higher-Complexity_Plp1-_Malat1+', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1+_VTA' 

### SN

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Dopaminergic_Higher-Complexity_Plp1-_Malat1+', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1+_SN' 

# Level 3: Snap25_Cortical

In [None]:
set(['_'.join(x.split('_')[:4]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_3_Snap25_Cortical')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 15))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Camk1d-Il31ra

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Snap25_Cortical', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Camk1d-Il31ra' 

### Lower Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Snap25_Cortical', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Lower-Complexity' 

### Higher Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Snap25_Cortical', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity' 

# Level 3: Snap25_Central

In [None]:
set(['_'.join(x.split('_')[:4]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_3_Snap25_Central')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Plp1+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Snap25_Central', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1+' 

### Plp1-

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Snap25_Central', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-' 

# Level 3: Plp1_Astrocyte-like

In [None]:
set(['_'.join(x.split('_')[:4]) for x in adata[adata.obs['cell_type'] == 'Plp1_Astrocyte-like'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_3_Plp1_Astrocyte-like')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Ageing glia

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Astrocyte-like', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing' 

### Cathepsin-Tyrobp

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Astrocyte-like', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like_Cathepsin-Tyrobp' 

# Level 4: Plp1_Astrocyte-like_Cathepsin-Tyrobp

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Plp1_Astrocyte-like_Cathepsin-Tyrobp'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_1_FINAL'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_1_1'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Plp1_Astrocyte-like_Cathepsin-Tyrobp')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Lower Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Astrocyte-like_Cathepsin-Tyrobp', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Astrocyte-like_Cathepsin-Tyrobp_Lower-Complexity' 

### Plp1+ Higher Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Astrocyte-like_Cathepsin-Tyrobp', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like_Cathepsin-Tyrobp_Higher-Complexity-Plp1+' 

# Level 3: Plp1_Plp1+++

In [None]:
set(['_'.join(x.split('_')[:4]) for x in adata[adata.obs['cell_type'] == 'Plp1_Plp1+++'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_2_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_2_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_2_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        # groups=['cluster_1_1_0', 'cluster_1_1_1'],
                        # reference='cluster_1_1_0', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_3_Plp1_Plp1+++')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 15))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Olig-Neurons

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Plp1+++', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Plp1+++_Olig-Neurons' 

### Lower-Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Plp1+++', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Plp1+++_Lower-Complexity' 

### Cathepsin-Tyrobp

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Plp1+++', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Plp1_Plp1+++_Cathepsin-Tyrobp' 

# Level 4: Plp1_Plp1+++_Lower-Complexity

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Plp1_Plp1+++_Lower-Complexity'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_2_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_2_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Plp1_Plp1+++_Lower-Complexity')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

### Ageing

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Plp1+++_Lower-Complexity', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Plp1+++_Lower-Complexity', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Plp1+++_Lower-Complexity_Ageing' 

### Non-Ageing

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Plp1+++_Lower-Complexity', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Plp1+++_Lower-Complexity', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Plp1+++_Lower-Complexity_Non-Ageing' 

# Level 3: Plp1_Erythroid

In [None]:
set(['_'.join(x.split('_')[:4]) for x in adata[adata.obs['cell_type'] == 'Plp1_Erythroid'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_3_Plp1_Erythroid')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Mbp'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Snap25+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Erythroid', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Erythroid_Snap25+' 

### Erythroid

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_3_Plp1_Erythroid', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Erythroid_Erythroid' 

# Level 4: Snap25_Central_Plp1-

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_1'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Snap25_Central_Plp1-')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Mbp'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 15))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Cortical

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Central_Plp1-', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Cortical' 

### Mbp+ Plp1+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Central_Plp1-', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+' 

### Pontine Gray/Middle Cerebellar Peduncle

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Central_Plp1-', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Central_Plp1-_PontineGray/MCP' 

# Level 5: Snap25_Central_Plp1-_Mbp-Plp+

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_5_Snap25_Central_Plp1-_Mbp-Plp+')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Malat1+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Central_Plp1-_Mbp-Plp+', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+' 

### Malat1-

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Central_Plp1-_Mbp-Plp+', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-' 

# Level 6: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+

In [None]:
set(['_'.join(x.split('_')[:7]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Meg3'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Astrocyte-associated

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+', 
                           group = 'cluster_0').iloc[:10]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_Astrocyte-associated' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General' 

# Level 7: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General

In [None]:
set(['_'.join(x.split('_')[:8]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Cntn5+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General', 
                           group = 'cluster_0').iloc[:10]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_Cntn5+' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General' 

# Level 8: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General

In [None]:
set(['_'.join(x.split('_')[:9]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Plp1+++

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General', 
                           group = 'cluster_0').iloc[:10]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_Plp1+++' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General' 

# Level 9: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General

In [None]:
set(['_'.join(x.split('_')[:10]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pvalb'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Pvalb+ Ventral

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General', 
                           group = 'cluster_0').iloc[:10]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_Ventral-Pvalb+' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General' 

# Level 10: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General

In [None]:
set(['_'.join(x.split('_')[:11]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_1'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Gad1', 'Cntnap2'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 15))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Superior Colliculus GABAergic

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_SupColl-GABAergic' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General' 

### Cntnap2+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General', 
                           group = 'cluster_2').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_Cntnap2+' 

# Level 11: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General

In [None]:
set(['_'.join(x.split('_')[:12]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Nrxn3'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 15))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Nrxn3+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_SupColl-GABAergic' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General' 

# Level 12: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General

In [None]:
set(['_'.join(x.split('_')[:13]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Grik1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Grik1+++

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_Grik1+++' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General' 

# Level 13: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General

In [None]:
set(['_'.join(x.split('_')[:14]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1_0_4'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_13_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Grik1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Malat1+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_13_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_13_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1+' 

### Malat1-

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_13_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_13_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-' 

# Level 14: Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-

In [None]:
set(['_'.join(x.split('_')[:15]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1_0_4_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_0_1_1_1_1_0_1_0_4_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_14_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Grik1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

### Sample-Dependent-1

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_14_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_14_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-_Sample-Dependent-1' 

### Sample-Dependent-2

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_14_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_14_Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1+_General_General_General_General_General_General_General_Malat1-_Sample-Dependent-2' 

# Level 6: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-

In [None]:
set(['_'.join(x.split('_')[:7]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Meg3'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Pvalb+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General' 

# Level 7: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+

In [None]:
set(['_'.join(x.split('_')[:8]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Gad1', 'Sncg'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Inner

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+_Inner' 

### Outer-Sncg+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_Pvalb+_Outer-Sncg+' 

# Level 7: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General

In [None]:
set(['_'.join(x.split('_')[:8]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Gad1', 'Sncg'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Lower-Complexity

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Lower-Complexity' 

### Higher Complexity

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity' 

# Level 8: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity

In [None]:
set(['_'.join(x.split('_')[:9]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Cartpt

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_Cartpt' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General' 

# Level 9: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General

In [None]:
set(['_'.join(x.split('_')[:10]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pvalb'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Calca+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Calca+' 

### Sample-dependent 1

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1' 

### Sample-dependent 2

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General', 
                           group = 'cluster_2').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2' 

# Level 10: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1

In [None]:
set(['_'.join(x.split('_')[:11]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0_FINAL'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pvalb', 'Ahi1'], groupby='comparison')

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Sample-Dependent-1

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1' 

### Sample-Dependent-2

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-2' 

# Level 11: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1

In [None]:
set(['_'.join(x.split('_')[:12]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Thalamic & PAG

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_Thalamic-PAG' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_11_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General' 

# Level 12: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General

In [None]:
set(['_'.join(x.split('_')[:13]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0_0_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_0_0_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1'], groupby='comparison')

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Dorsal (Technical)

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General_Dorsal' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_12_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-1_Sample-Dependent-1_General_General' 

# Level 10: Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2

In [None]:
set(['_'.join(x.split('_')[:11]) for x in adata[adata.obs['cell_type'] == 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_2_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_2_1_0_1_1_1_1_2_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pvalb', 'Ahi1'], groupby='comparison')

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Pvalb+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2', 
                           group = 'cluster_0').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2_Pvalb+' 

### Ahi+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_10_Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Central_Plp1-_Mbp-Plp+_Malat1-_General_Higher-Complexity_General_Sample-Dependent-2_Ahi+' 

# Level 4: Snap25_Cortical_Higher-Complexity

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Snap25_Cortical_Higher-Complexity')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 15))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Inner Cortex

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Cortical_Higher-Complexity', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Inner-Cortex' 

### Outer Cortex

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Cortical_Higher-Complexity', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Outer-Cortex' 

### Hippocampus

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Cortical_Higher-Complexity', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus' 

# Level 5: Snap25_Cortical_Higher-Complexity_Hippocampus

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Hippocampus'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_5_Snap25_Cortical_Higher-Complexity_Hippocampus')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pcp4', 'Neurod6'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### CA Fields

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Cortical_Higher-Complexity_Hippocampus', 
                           group = 'cluster_0')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Cortical_Higher-Complexity_Hippocampus', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields' 

### Prox1+ DG

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Cortical_Higher-Complexity_Hippocampus', 
                           group = 'cluster_1').iloc[:20]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Cortical_Higher-Complexity_Hippocampus', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_DG-Prox1+' 

# Level 6: Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields

In [None]:
set(['_'.join(x.split('_')[:7]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_6_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pcp4', 'Neurod6'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### CA3

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields', 
                           group = 'cluster_0')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA3' 

### CA1-Subiculum-Amygdala

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields', 
                           group = 'cluster_1')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala' 

# Level 7: Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala

In [None]:
set(['_'.join(x.split('_')[:8]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_7_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pcp4', 'Neurod6'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Whole Brain - DCC+++

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala', 
                           group = 'cluster_0')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_Whole-Brain-DCC+' 

### General

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala', 
                           group = 'cluster_1')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General' 

# Level 8: Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General

In [None]:
set(['_'.join(x.split('_')[:9]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1_1'), 'comparison'] = 'cluster_3'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1_3'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pcp4', 'Neurod6'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 20))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Whole Brain - Lsamp+-Rbfox1+

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_0')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_Lsamp+-Rbfox1+' 

### Ventral Subiculum (in most samples)

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_1')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_Ventral-Subiculum-Sample-Dependent' 

### Dorsal Subiculum

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_2')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_Dorsal-Subiculum' 

### CA1

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_3')

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_8_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General', 
                           group = 'cluster_3')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_3'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_CA1' 

# Level 9: Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_CA1

In [None]:
set(['_'.join(x.split('_')[:10]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_CA1'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_1_0_1_1_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_9_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General_CA1')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1', 'Malat1', 'Pcp4', 'Neurod6'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

In [None]:
adata_comparison.obs.groupby(['mouse_id', 'comparison']).size()

### Sample Consistent

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General_CA1', 
                           group = 'cluster_0').iloc[:10]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General_CA1', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_CA1_Sample-Consistent' 

### Sample Dependent

In [None]:
sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General_CA1', 
                           group = 'cluster_1').iloc[:10]

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_9_Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields-CA1-Subiculum-Amygdala_General_CA1', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Hippocampus_CA-Fields_CA1-Subiculum-Amygdala_General_CA1_Sample-Dependent' 

# Level 5: Snap25_Cortical_Higher-Complexity_Outer-Cortex

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Outer-Cortex'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_0_2'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_0_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_5_Snap25_Cortical_Higher-Complexity_Outer-Cortex')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Ventral

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Cortical_Higher-Complexity_Outer-Cortex', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Outer-Cortex_Ventral' 

### General

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Snap25_Cortical_Higher-Complexity_Outer-Cortex', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Outer-Cortex_General' 

# Level 6: Snap25_Cortical_Higher-Complexity_Outer-Cortex_General

In [None]:
set(['_'.join(x.split('_')[:7]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Higher-Complexity_Outer-Cortex_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_0_2_2'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_2_0_2_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_6_Snap25_Cortical_Higher-Complexity_Outer-Cortex_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Dorsomedial

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Cortical_Higher-Complexity_Outer-Cortex_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Outer-Cortex_General_Dorsomedial' 

### General

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Snap25_Cortical_Higher-Complexity_Outer-Cortex_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Higher-Complexity_Outer-Cortex_General_General' 

# Level 4: Snap25_Cortical_Lower-Complexity

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Snap25_Cortical_Lower-Complexity'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_1_0'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_2_3_1_FINAL'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Snap25_Cortical_Lower-Complexity')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Ventral

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Cortical_Lower-Complexity', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Snap25_Cortical_Lower-Complexity_Ventral' 

### General

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Snap25_Cortical_Lower-Complexity', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Snap25_Cortical_Lower-Complexity_General' 

# Level 4: Plp1_Astrocyte-like_Ageing

In [None]:
set(['_'.join(x.split('_')[:5]) for x in adata[adata.obs['cell_type'] == 'Plp1_Astrocyte-like_Ageing'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_4_Plp1_Astrocyte-like_Ageing')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Dura Mater

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Astrocyte-like_Ageing', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing-Dura-Mater' 

### Dura Mater

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_4_Plp1_Astrocyte-like_Ageing', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes' 

# Level 5: Plp1_Astrocyte-like_Ageing_Astrocytes

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Plp1_Astrocyte-like_Ageing_Astrocytes'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_5_Plp1_Astrocyte-like_Ageing_Astrocytes')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Dura Mater

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Plp1_Astrocyte-like_Ageing_Astrocytes', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing-Astrocytes_Dura-Mater' 

### General

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_5_Plp1_Astrocyte-like_Ageing_Astrocytes', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes_General' 

# Level 6: Plp1_Astrocyte-like_Ageing_Astrocytes_General

In [None]:
set(['_'.join(x.split('_')[:7]) for x in adata[adata.obs['cell_type'] == 'Plp1_Astrocyte-like_Ageing_Astrocytes_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_6_Plp1_Astrocyte-like_Ageing_Astrocytes_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Dura Mater

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Plp1_Astrocyte-like_Ageing_Astrocytes_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes_General_Dura-Mater' 

### General

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_6_Plp1_Astrocyte-like_Ageing_Astrocytes_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General' 

# Level 7: Plp1_Astrocyte-like_Ageing_Astrocytes_General_General

In [None]:
set(['_'.join(x.split('_')[:8]) for x in adata[adata.obs['cell_type'] == 'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General'].obs['current_leiden'].unique().tolist()])

In [None]:
adata.obs['comparison'] = 'ignore'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_1_1_2'), 'comparison'] = 'cluster_2'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_1_1_1'), 'comparison'] = 'cluster_1'
adata.obs.loc[adata.obs['current_leiden'].str.contains('^s_3_1_0_1_1_1_0'), 'comparison'] = 'cluster_0'
adata.obs.groupby(['comparison']).size()

In [None]:
adata_comparison = adata[adata.obs['comparison'] != 'ignore'].copy()
adata_comparison.layers['count'] = adata_comparison.X.copy()
sc.pp.normalize_total(adata_comparison, target_sum=1e4)
if 'log1p' in adata_comparison.uns.keys(): 
        del adata_comparison.uns['log1p']
sc.pp.log1p(adata_comparison)
adata_comparison.layers['lognorm'] = adata_comparison.X.copy()
adata_comparison.X = adata_comparison.layers['count'].copy()
sc.tl.rank_genes_groups(adata_comparison, 
                        groupby='comparison', 
                        use_raw=False,
                        layer='lognorm', 
                        method='wilcoxon', 
                        rankby_abs=True,
                        pts=True, 
                        key_added='level_7_Plp1_Astrocyte-like_Ageing_Astrocytes_General_General')

In [None]:
sc.pl.violin(adata_comparison, keys=['pct_counts_mt', 'pct_counts_in_top_100_genes', 'n_genes_by_counts', 'Plp1'], groupby='comparison')

In [None]:

fig, axs = plt.subplots(nrows=adata_comparison.obs['comparison'].unique().shape[0], ncols=7, figsize=(35, 10))
for row, cluster in enumerate(adata_comparison.obs['comparison'].unique()):
    for col, section in enumerate(adata_comparison.obs.sample_id.unique()):
        sc.pl.spatial(adata_comparison[adata_comparison.obs['sample_id'] == section].copy(), 
                     spot_size=25, 
                      size = 5,
                      groups = cluster,
                     color = 'comparison', 
                     ax=axs[row, col], 
                     show=False)
        

### Snap25+

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Plp1_Astrocyte-like_Ageing_Astrocytes_General_General', 
                           group = 'cluster_0')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_0'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Snap25+' 

### Oligodendrocytes

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Plp1_Astrocyte-like_Ageing_Astrocytes_General_General', 
                           group = 'cluster_1')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_1'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Oligodendrocytes' 

### Lower Complexity

In [None]:
df = sc.get.rank_genes_groups_df(adata_comparison, 
                           key = 'level_7_Plp1_Astrocyte-like_Ageing_Astrocytes_General_General', 
                           group = 'cluster_2')

df.loc[(df['pvals_adj'] < 0.01) & (df['pct_nz_group'] > 0.2)].sort_values('logfoldchanges', ascending=False).iloc[:20]

In [None]:
adata.obs.loc[adata.obs['comparison'].str.contains('^cluster_2'), 'cell_type'] = 'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Lower-Complexity' 

# Cell Type Breakdown

In [None]:
pd.set_option('display.max_rows', 500)

adata.obs.groupby(['cell_type']).size()

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].astype('str')

In [None]:
adata.obs.loc[adata.obs['cell_type'] == 'Dopaminergic_Higher-Complexity_Plp-_Malat1+_SN', 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1+_SN'
adata.obs.loc[adata.obs['cell_type'] == 'Dopaminergic_Higher-Complexity_Plp-_Malat1-_SN', 'cell_type'] = 'Dopaminergic_Higher-Complexity_Plp1-_Malat1-_SN'

### check for subclusters within a given cell type

In [None]:
set(['_'.join(x.split('_')[:6]) for x in adata[adata.obs['cell_type'] == 'Snap25_Thalamic'].obs['current_leiden'].unique().tolist()])

In [None]:
with open('input/adata/midbrain/adata_labelled_20220412.pickle', 'wb') as f:
    pickle.dump(adata, f)

In [2]:
with open('input/adata/midbrain/adata_labelled_20220412.pickle', 'rb') as f:
    adata = pickle.load(f)

### Filter cell types for further analysis

In [None]:
cell_types_to_remove = [
    'Dopaminergic_Higher-Complexity_Plp1+',
    'Dopaminergic_Lower-Complexity',
    'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Lower-Complexity',
    'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Snap25+',
    'Plp1_Astrocyte-like_Cathepsin-Tyrobp_Lower-Complexity',
    'Plp1_Erythroid_Snap25+',
    'Plp1_Plp1+++_Olig-Neurons',

In [8]:
pd.set_option('display.max_rows', 500)

adata.obs.groupby('cell_type').size()

cell_type
Cartpt neurons (Edinger-Westphal nucleus)                                                                                          153
Dopaminergic_Higher-Complexity_Plp1+                                                                                               309
Dopaminergic_Higher-Complexity_Plp1-_Malat1+_SN                                                                                    463
Dopaminergic_Higher-Complexity_Plp1-_Malat1+_VTA                                                                                   261
Dopaminergic_Higher-Complexity_Plp1-_Malat1-_SN                                                                                    843
Dopaminergic_Higher-Complexity_Plp1-_Malat1-_VTA                                                                                   765
Dopaminergic_Lower-Complexity                                                                                                      478
Plp1_Astrocyte-like_Ageing-Astrocytes_Dura-Ma

In [6]:
adata.obs['cell_type'].sort_values().unique()

array(['Cartpt neurons (Edinger-Westphal nucleus)',
       'Dopaminergic_Higher-Complexity_Plp1+',
       'Dopaminergic_Higher-Complexity_Plp1-_Malat1+_SN',
       'Dopaminergic_Higher-Complexity_Plp1-_Malat1+_VTA',
       'Dopaminergic_Higher-Complexity_Plp1-_Malat1-_SN',
       'Dopaminergic_Higher-Complexity_Plp1-_Malat1-_VTA',
       'Dopaminergic_Lower-Complexity',
       'Plp1_Astrocyte-like_Ageing-Astrocytes_Dura-Mater',
       'Plp1_Astrocyte-like_Ageing-Dura-Mater',
       'Plp1_Astrocyte-like_Ageing_Astrocytes_General_Dura-Mater',
       'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Lower-Complexity',
       'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Oligodendrocytes',
       'Plp1_Astrocyte-like_Ageing_Astrocytes_General_General_Snap25+',
       'Plp1_Astrocyte-like_Cathepsin-Tyrobp_Higher-Complexity-Plp1+',
       'Plp1_Astrocyte-like_Cathepsin-Tyrobp_Lower-Complexity',
       'Plp1_Erythroid_Erythroid', 'Plp1_Erythroid_Snap25+',
       'Plp1_Plp1+++_Ca

# Export counts matrix for pseudobulk DE

In [None]:
del adata.layers['counts']
del adata.layers['count']
del adata.layers['lognorm']
adata.write_h5ad('input/adata/midbrain/adata_labelled.h5ad')

In [None]:
import scipy.io as sio

In [None]:
sio.mmwrite('input/adata/adata_labelled.mtx', adata.X)

In [None]:
adata.obs.to_csv('input/adata/adata_labelled_meta.csv')

In [None]:
pd.DataFrame(adata.var_names).to_csv('input/adata/adata_labelled_var_names.csv', index=False)

In [None]:
sc.pl.violin(adata, keys=['Svs5'], groupby='cell_type')