### Import packages and filepaths

In [None]:
import scanpy as sc
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import os
import sys

In [None]:
#This file is currently available from https://figshare.com/articles/dataset/Tabula_Sapiens_release_1_0/14267219?file=40067134
#change the path to wherever you download the file located at the above link on your system
ts_path="/oak/stanford/groups/ckuo/Shawn_shared/Tabula_Sapiens_Analysis/TabulaSapiens.h5ad"

In [None]:
#change the following path to a folder on your system you would like to output files to
save = "/scratch/users/sschulz/shared"

In [None]:
#change the following path to the path to your Tabula Analysis folder
tabula_analysis_path="/scratch/users/sschulz/shared/Tabula_Analysis"

In [None]:
#this path is not currently used to generate data used in the publication
#gaf="/scratch/users/sschulz/Glia_analysis/"

### Read the adata

In [None]:
all_ts = sc.read_h5ad(ts_path)

### Change the defaults of scanpy's fig generation

In [None]:
sc.set_figure_params(dpi=100, dpi_save=1000)

### Check leiden

In [None]:
sc.tl.leiden(all_ts, resolution=0.7)

In [None]:
sc.pl.dotplot(all_ts, groupby='leiden', swap_axes=True, use_raw=False, var_names=['SOX10', 'NCAM1', 'PLP1', 'S100B', 'GFRA3',"GFAP"], dot_max=1, log=False,save = "_filter_dotplot_leiden_0.7_all_ts.pdf", cmap='Reds')


In [None]:
all_ts.obs.to_csv(save + "/" + "all_ts_metadata.csv")

### Make UMAP plots for genes of interest and save

In [None]:
def colUMAP(gene):
    sc.pl.umap(all_ts, color=gene, save= "_" + gene + ".pdf", use_raw=False, cmap='RdYlBu_r', vmax=10)
    plt.savefig(save + "/" + "UMAP_" + gene + ".pdf", bbox_inches="tight")

In [None]:
gene_list = ['SOX10','PLP1','S100B','NCAM1','GFAP','PLLP','MPZ','MBP','PMP22','NCMAP','CRYAB', 'CD14', "FCGR3A", 'CCR2', 'CDH19',"LGI4", "MYOT",'CDKN1C','SCN7A','GFRA3','APOE','NRXN1']
for gene in gene_list:
    colUMAP(gene)

In [None]:
all_ts.obs

### Load in annotations from circling cluster

In [None]:
#this csv was generated by manually circling cluster using cellxgene: https://cellxgene.cziscience.com/
ann_df = pd.read_csv(tabula_analysis_path + "/tabula_sapiens_analysis/" + "tabula_sapiens-cell-labels-XMFNVFXU.csv", skiprows=2)

In [None]:
ann_df['glial_cells'].value_counts()

In [None]:
glia_index = ann_df[ann_df['glial_cells'] == 'glia']['cell_id']

### Highlight cells of interest and save subset

In [None]:
all_ts.obs['glial_highlight'] = 'non_glia'

all_ts.obs['glial_highlight'] = all_ts.obs['glial_highlight'].astype('category')

all_ts.obs['glial_highlight'] = all_ts.obs['glial_highlight'].cat.set_categories(['non_glia', 'glia_candidates'])

all_ts.obs['glial_highlight'].loc[glia_index] = 'glia_candidates'

In [None]:
sc.pl.umap(all_ts, color = 'glial_highlight', save='_all_cells_glia_highlight', palette={'non_glia':'grey','glia_candidates':'orange'})
                                                                                         

In [None]:
subset = all_ts[all_ts.obs['glial_highlight'] == 'glia_candidates']

In [None]:
subset.write_h5ad(save + '/' + "glia_689_subset.h5ad")

In [None]:
subset.obs

In [None]:
#save counts as well
subset.obs['organ_tissue'].value_counts().to_frame('counts').to_csv(save + '/' + 'glia_candidate_counts.csv')

### Ranked genes group calculations (not currently used in publication)

In [None]:
##this takes a long time, only run again when you're ready for your notebook to hang for a while
sc.tl.rank_genes_groups(all_ts, groupby='glial_highlight', use_raw=False, method='wilcoxon')
all_ts.write_h5ad("/scratch/users/sschulz/all_ts_ranked_genes.h5ad")
sc.pl.rank_genes_groups(all_ts, groups='leiden',save="all_rank_gene_plot",)

#this last part needs to be changed to the correct leiden group
sc.get.rank_genes_groups_df(all_ts,group='glia_candidates').to_csv("/Users/shawnschulz/Documents/shared/ts_689_rank_genes.csv")

### Clustered heatmaps (not currently used in publication)

In [None]:
import seaborn

In [None]:
import fastcluster

In [None]:
before_subset

In [None]:
df = pd.read_csv(gaf + 'diff_genes.csv')

diff_mye = df['Myelinating']

diff_mye = diff_mye.dropna()

diff_non=df['Non-Myelinating']

In [None]:
myelinating = ['ERBB3', 'S100B', 'PLP1', 'SOX10', 'MBP', 'MPZ', 'PTN', 'CRYAB', 'NCMAP', 'PLLP', 'PRX', 'PMP22', 'MAG', 'EGR2', "APOE"]
non_myelinating = ['ERBB3', 'GFRA3', 'S100B', 'PLP1', 'SOX10', 'NGFR', 'SCN7A', 'APOD', 'SMOC2', 'ECM1', 'NCAM1', 'L1CAM', 'NRXN1']
features=['SOX10', 'NCAM1', 'PLP1']


In [None]:
#This code block gets lists of classic glia markers
def intersection(list1, list2):
    x =  set(list1).intersection(set(list2))
    display(x)
    return(list(x))
def filterSet(list1, list2):
    x = intersection(list1, list2)
    y = set(list1) - set(list2)
    return(list(y))
shared = set(diff_mye).intersection(set(diff_non))
shared = set(myelinating).intersection(set(non_myelinating))
def order_unique(seq):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]
for i in shared:
    features.append(i)
mye = filterSet(diff_mye, features)
non = filterSet(diff_non, features)
del features[6]
def fixOrder(list1, list2):
    return([x for x in list1 if x in list2])
mye = fixOrder(diff_mye, mye)
non = fixOrder(diff_non, non)
geneDict = {}
geneDict['Shared'] = features
geneDict['Myelinating'] = mye
geneDict['Non-Myelinating']=non

In [None]:
l = list(geneDict.values())

In [None]:
all_features = [item for sublist in l for item in sublist]


In [None]:
def flatten(l):
    flatList = [item for sublist in l for item in sublist]
    return flatList

In [None]:
stain_features = ['GFAP', 'GFRA3', 'SOX10', 'S100B', 'S100A', 'MBP', 'KCNA2', 'KCNA6']

In [None]:
stain = no_glia[:,no_glia.var.index.isin(stain_features)]

In [None]:
stain = stain.copy().transpose()

In [None]:
colors = {'0_Bladder':'#97c3b4', '0_Eye':'#e85535', '0_Large_Intestine':'#3bc587', '0_Lung':'#9775fa', '0_Muscle':'#1c034d',
       '0_Pancreas':'#4ce88a', '0_Prostate':'#9e1977', '0_Small_Intestine':'#cfc8d6', '0_Thymus':'#cfc8d6',
       '2_Large_Intestine':'#651665', '2_Lung':'#ed78ed', '2_Prostate':'#375292', '2_Salivary_Gland':'#6f9269',
       '2_Tongue':'#c21290'}

In [None]:
stain.var['classes_colors'] = stain.var['classes'].replace(colors)

In [None]:
heatmap = seaborn.clustermap(stain.X.todense(), yticklabels= list(stain.obs.index),row_cluster = True, col_colors=stain.var['classes_colors'].to_numpy(), cmap='RdYlBu_r')
heatmap.tick_params(axis='both', which='major', labelsize=5)
fig = heatmap.fig
fig.savefig(save + "/" + "ts_glia_stain_genes_clustered_heatmap_less_genes.pdf") 

In [None]:
bladder = no_glia[no_glia.obs['tissue_in_publication'] == 'Bladder']

In [None]:
bstain

In [None]:
bstain = bladder[:,bladder.var.index.isin(stain_features)]
bstain = bstain.copy().transpose()
colors = {'0_Bladder':'#97c3b4', '0_Eye':'#e85535', '0_Large_Intestine':'#3bc587', '0_Lung':'#9775fa', '0_Muscle':'#1c034d',
       '0_Pancreas':'#4ce88a', '0_Prostate':'#9e1977', '0_Small_Intestine':'#cfc8d6', '0_Thymus':'#cfc8d6',
       '2_Large_Intestine':'#651665', '2_Lung':'#ed78ed', '2_Prostate':'#375292', '2_Salivary_Gland':'#6f9269',
       '2_Tongue':'#c21290'}
bstain.var['classes_colors'] = bstain.var['classes'].replace(colors)

In [None]:
heatmap = seaborn.clustermap(bstain.X.todense(), yticklabels= list(stain.obs.index),row_cluster = True, col_colors=stain.var['classes_colors'].to_numpy(), cmap='RdYlBu_r')
heatmap.tick_params(axis='both', which='major', labelsize=5)
fig = heatmap.fig
fig.savefig(save + "/" + "ts_glia_bladder_genes_clustered_heatmap_less_genes.pdf") 

In [None]:
feat_subset = no_glia[:,no_glia.var.index.isin(all_features)]

In [None]:
flipped = feat_subset.copy().transpose()

In [None]:
np.unique(flipped.var['classes'])

In [None]:
len(np.unique(flipped.var['classes']))

In [None]:
colors = {'0_Bladder':'#97c3b4', '0_Eye':'#e85535', '0_Large_Intestine':'#3bc587', '0_Lung':'#9775fa', '0_Muscle':'#1c034d',
       '0_Pancreas':'#4ce88a', '0_Prostate':'#9e1977', '0_Small_Intestine':'#cfc8d6', '0_Thymus':'#cfc8d6',
       '2_Large_Intestine':'#651665', '2_Lung':'#ed78ed', '2_Prostate':'#375292', '2_Salivary_Gland':'#6f9269',
       '2_Tongue':'#c21290'}

In [None]:
flipped.var['classes_colors'] = flipped.var['classes'].replace(colors)

In [None]:
def helper(row):
    if row['feature_name'] in geneDict['Shared']:
        return 'Common'
    if row['feature_name'] in geneDict['Myelinating']:
        return 'Myelinating'
    if row['feature_name'] in geneDict['Non-Myelinating']:
        return 'Non-Myelinating'
def simple(row):
    return helper(row)
flipped.obs['glia_type'] = flipped.obs.apply(lambda row: helper(row), axis=1).astype('category')

In [None]:
def helper(row):
    if row['feature_name'] in geneDict['Shared']:
        return '#29d797'
    if row['feature_name'] in geneDict['Myelinating']:
        return '#c37b79'
    if row['feature_name'] in geneDict['Non-Myelinating']:
        return '#2e36c4'
def simple(row):
    return helper(row)
flipped.obs['colors'] = flipped.obs.apply(lambda row: helper(row), axis=1).astype('category')

In [None]:
flipped = flipped[flipped.obs.sort_values('glia_type').index]

In [None]:
colorbar= {'#29d797':'Shared', '#c37b79':'Myelinating', '#2e36c4':'Non-Myelinating'}

In [None]:
no_cns = {'Common': ['SOX10', 'PLP1', 'NCAM1'],
 'MSC': ['PLLP', 'MPZ', 'MBP', 'GFAP'],
 'NMSC': ['SCN7A', 'GFRA3', 'PTPRZ1']}

In [None]:
small = flipped[flipped.obs.index.isin(flatten(list(no_cns.values()))),:]

In [None]:
def helper(row):
    if row['feature_name'] in no_cns['Common']:
        return 'Common'
    if row['feature_name'] in no_cns['MSC']:
        return 'Myelinating'
    if row['feature_name'] in no_cns['NMSC']:
        return 'Non-Myelinating'
def simple(row):
    return helper(row)
flipped.obs['glia_type'] = flipped.obs.apply(lambda row: helper(row), axis=1).astype('category')
def helper(row):
    if row['feature_name'] in no_cns['Common']:
        return '#29d797'
    if row['feature_name'] in no_cns['MSC']:
        return '#c37b79'
    if row['feature_name'] in no_cns['NMSC']:
        return '#2e36c4'
def simple(row):
    return helper(row)
small.obs['shorter colors'] = small.obs.apply(lambda row: helper(row), axis=1).astype('category')

In [None]:
heatmap = seaborn.clustermap(small.X.todense(), yticklabels= list(small.obs.index),row_cluster = False,row_colors = small.obs['shorter colors'].to_numpy(), col_colors=small.var['classes_colors'].to_numpy(), cmap='RdYlBu_r')
heatmap.tick_params(axis='both', which='major', labelsize=5)
fig = heatmap.fig
fig.savefig(save + "/" + "ts_glia_small_clustered_heatmap_less_genes.pdf") 

In [None]:
heatmap = seaborn.clustermap(flipped.X.todense(), yticklabels= list(flipped.obs.index),row_colors = flipped.obs['colors'].to_numpy(), col_colors=flipped.var['classes_colors'].to_numpy(), cmap='RdYlBu_r')
heatmap.tick_params(axis='both', which='major', labelsize=5)
fig = heatmap.fig
fig.savefig(save + "/" + "ts_glia_big_clustered_heatmap_many_genes.pdf") 