<a href="https://colab.research.google.com/github/sbooeshaghi/azucar/blob/main/reference/brain/celltype/markers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --quiet anndata

[?25l[K     |███▍                            | 10 kB 19.1 MB/s eta 0:00:01[K     |██████▉                         | 20 kB 2.4 MB/s eta 0:00:01[K     |██████████▎                     | 30 kB 3.5 MB/s eta 0:00:01[K     |█████████████▋                  | 40 kB 4.5 MB/s eta 0:00:01[K     |█████████████████               | 51 kB 4.8 MB/s eta 0:00:01[K     |████████████████████▌           | 61 kB 5.6 MB/s eta 0:00:01[K     |███████████████████████▉        | 71 kB 5.6 MB/s eta 0:00:01[K     |███████████████████████████▎    | 81 kB 6.3 MB/s eta 0:00:01[K     |██████████████████████████████▊ | 92 kB 6.1 MB/s eta 0:00:01[K     |████████████████████████████████| 96 kB 2.7 MB/s 
[?25h

In [2]:
!curl -Ls https://github.com/sbooeshaghi/azucar/blob/main/reference/brain/celltype/markers.h5ad?raw=true > markers.h5ad

In [3]:
import anndata
import json
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.preprocessing import normalize, scale
import glob

import numpy.lib.recfunctions as rf

def nd(arr):
    return np.asarray(arr).reshape(-1)

fsize=20

plt.rcParams.update({'font.size': fsize})
%config InlineBackend.figure_format = 'retina'

In [4]:
adata = anndata.read_h5ad("markers.h5ad")

In [5]:
adata

AnnData object with n_obs × n_vars = 5690 × 39
    obs: 'barcodes', 'celltype'

In [6]:
mg_mean = pd.DataFrame(adata.X, index=adata.obs["celltype"], columns=adata.var.index.values).groupby("celltype").mean()
top_idx = np.argsort(mg_mean.values, 1)[:,-10:]

In [7]:
# positive value and top 10
d = defaultdict(list)
for idx, (ct, t) in enumerate(zip(mg_mean.index.values, top_idx)):
  mask = mg_mean.loc[ct].values[t] > 0
  d[ct] = adata.var.index.values[t][mask]

In [8]:
[len(v) for k,v in d.items()]

[6, 8, 5, 7, 9, 5, 10, 5, 5, 4, 8, 6, 6, 9, 6, 5, 5, 6, 8, 6, 7, 3, 6, 6]

In [9]:
d

defaultdict(list,
            {'Astro1': array(['Olig1', 'Gfap', 'Hepacam', 'Cxcl14', 'Apoe', 'Aldoc'],
                   dtype=object),
             'Astro2': array(['Plp1', 'Olig1', 'Reln', 'Cxcl14', 'Hepacam', 'Apoe', 'Aldoc',
                    'Gfap'], dtype=object),
             'COPs': array(['Olig1', 'C1ql1', 'Olig2', 'Neu4', 'Plp1'], dtype=object),
             'ChorPlex': array(['Hydin', 'Riiad1', 'Apoe', 'Foxj1', 'Slc4a5', 'Sostdc1', 'Aqp1'],
                   dtype=object),
             'DivOPCs': array(['Neu4', 'Cxcl14', 'C1ql1', 'Olig1', 'Olig2', 'Rrm2', 'Pdgfra',
                    'Top2a', 'Ccna2'], dtype=object),
             'Endo': array(['Top2a', 'Ccna2', 'Kcnj8', 'Ptprb', 'Cldn5'], dtype=object),
             'Ependymal': array(['Aldoc', 'Gfap', 'Reln', 'Hepacam', 'Top2a', 'Apoe', 'Npy',
                    'Hydin', 'Riiad1', 'Foxj1'], dtype=object),
             'ExcitNeuron1': array(['Npy', 'Dcn', 'Bhlhe22', 'Grin2b', 'Neurod6'], dtype=object),
             '

In [10]:
list(d.keys())

['Astro1',
 'Astro2',
 'COPs',
 'ChorPlex',
 'DivOPCs',
 'Endo',
 'Ependymal',
 'ExcitNeuron1',
 'ExcitNeuron2',
 'ExcitNeuron3',
 'GranuleNB1',
 'GranuleNB2',
 'GranuleNB3',
 'InhibNeuron1',
 'InhibNeuron2',
 'InhibNeuron3',
 'Macrophages',
 'Microglia',
 'NIPCs',
 'OPCs',
 'RGL',
 'Vasc1',
 'Vasc2',
 'Vasc3']

In [11]:
with open("markers.txt", "w") as f:
    for k, v in d.items():
        f.write(f"{k}\t")
        n = len(v)
        for idx, i in enumerate(v):
            f.write(f"{i}")
            if idx < n - 1:
                f.write(",")
        f.write("\n")

In [12]:
!cat markers.txt

Astro1	Olig1,Gfap,Hepacam,Cxcl14,Apoe,Aldoc
Astro2	Plp1,Olig1,Reln,Cxcl14,Hepacam,Apoe,Aldoc,Gfap
COPs	Olig1,C1ql1,Olig2,Neu4,Plp1
ChorPlex	Hydin,Riiad1,Apoe,Foxj1,Slc4a5,Sostdc1,Aqp1
DivOPCs	Neu4,Cxcl14,C1ql1,Olig1,Olig2,Rrm2,Pdgfra,Top2a,Ccna2
Endo	Top2a,Ccna2,Kcnj8,Ptprb,Cldn5
Ependymal	Aldoc,Gfap,Reln,Hepacam,Top2a,Apoe,Npy,Hydin,Riiad1,Foxj1
ExcitNeuron1	Npy,Dcn,Bhlhe22,Grin2b,Neurod6
ExcitNeuron2	Dcn,Igfbpl1,Bhlhe22,Neurod6,Grin2b
ExcitNeuron3	Bhlhe22,Grin2b,Calb2,Neurod6
GranuleNB1	Npy,Dcn,Sst,Igfbpl1,Neurod6,Neurod1,Grin2b,Bhlhe22
GranuleNB2	Grin2b,Neurod6,Bhlhe22,Calb2,Igfbpl1,Neurod1
GranuleNB3	Hydin,Bhlhe22,Neurod6,Calb2,Neurod1,Igfbpl1
InhibNeuron1	Cxcl14,Grin2b,Reln,Sp9,Sst,Npy,Slc32a1,Gad2,Lhx6
InhibNeuron2	Reln,Grin2b,Igfbpl1,Slc32a1,Gad2,Sp9
InhibNeuron3	Hydin,Bhlhe22,C1ql1,Calb2,Reln
Macrophages	Top2a,Reln,Tmem119,Trem2,Apoe
Microglia	Ccna2,Rrm2,Top2a,Apoe,Tmem119,Trem2
NIPCs	Sp9,Olig1,Igfbpl1,Hepacam,Olig2,Top2a,Ccna2,Rrm2
OPCs	Cxcl14,Neu4,Olig2,Olig1,C1ql1,Pdgfra
RGL