## Marker preparation from CellMarker v1
- ### Monocyte: "Monocyte"
- ### NK: "Natural killer cell"
- ### Naive B: "Naive B cell","Resting naive B cell"
- ### Memory B: "Memory B cell","Resting memory B cell"
- ### Memory CD4: "Memory CD4+ T cell"
- ### Naive CD4: "Naive CD4 T cell","Naive CD4+ T cell"
- ### CD8: "Activated CD8+ T cell","Activated naive CD8+ T cell","Activated memory CD8+ T cell","CD8 T cell","CD8+ T cell","Memory CD8 T cell","Memory CD8+ T cell","Naive CD8 T cell","Naive CD8+ T cell"
- ### γδ: "Gamma delta(γδ) T cell"

In [1]:
import pandas as pd
import codecs
import collections

Base_dir = '/mnt/AzumaDeconv/github/GLDADec' # cloning repository

In [2]:
# load total data
with codecs.open(Base_dir + '/data/domain_info/CellMarker/Cell_marker_All.csv', "r", "Shift-JIS", "ignore") as file:
    total_ref = pd.read_table(file, delimiter=",")

# Human PBMC
human_ref = total_ref[total_ref["species"].isin(["Human"])] # 60877
pbmc_ref = human_ref[human_ref["tissue_type"].isin(["Peripheral blood"])] # 2731

In [3]:
# Monocyte
mon_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Monocyte"])]
sym_mon = mon_ref["Symbol"].dropna().unique().tolist()

# NK
nk_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Natural killer cell"])]
sym_nk = nk_ref["Symbol"].dropna().unique().tolist()

# B
b_n_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Naive B cell","Resting naive B cell"])]
b_m_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Memory B cell","Resting memory B cell"])]
sym_bn = b_n_ref["Symbol"].dropna().unique().tolist()
sym_bm = b_m_ref["Symbol"].dropna().unique().tolist()

# CD4
cd4_m_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Memory CD4+ T cell"])]
cd4_n_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Naive CD4 T cell","Naive CD4+ T cell"])]
sym_cd4n = cd4_n_ref["Symbol"].dropna().unique().tolist()
sym_cd4m = cd4_m_ref["Symbol"].dropna().unique().tolist()

# CD8
cd8_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Activated CD8+ T cell","Activated naive CD8+ T cell","Activated memory CD8+ T cell","CD8 T cell","CD8+ T cell","Memory CD8 T cell","Memory CD8+ T cell","Naive CD8 T cell","Naive CD8+ T cell"])]
sym_cd8 = cd8_ref["Symbol"].dropna().unique().tolist()

# gamma delta
gd_ref = pbmc_ref[pbmc_ref["cell_name"].isin(["Gamma delta(γδ) T cell"])]
sym_gd = gd_ref["Symbol"].dropna().unique().tolist()

In [4]:
# extract registered markers
a = [sym_mon,sym_nk,sym_bn,sym_bm,sym_cd4n,sym_cd4m,sym_cd8,sym_gd]
cells = ["Monocytes","NK cells","B cells naive","B cells memory","T cells CD4 naive","T cells CD4 memory","T cells CD8","T cells gamma delta"]
cellmarker_dic_v1 = dict(zip(cells,a))

pd.to_pickle(cellmarker_dic_v1,'/mnt/AzumaDeconv/github/GLDADec/data/domain_info/human_PBMC_CellMarker_8cell_raw_dic_v1.pkl')

In [5]:
for i,k in enumerate(cellmarker_dic_v1):
    print('---',k,'---')
    print(len(cellmarker_dic_v1.get(k)),'markers were collected')
    print()

--- Monocytes ---
71 markers were collected

--- NK cells ---
50 markers were collected

--- B cells naive ---
13 markers were collected

--- B cells memory ---
7 markers were collected

--- T cells CD4 naive ---
41 markers were collected

--- T cells CD4 memory ---
14 markers were collected

--- T cells CD8 ---
123 markers were collected

--- T cells gamma delta ---
17 markers were collected



***
### Extract cell specific markers

In [6]:
# detect cell specific marker
merge_marker = sym_mon + sym_nk + sym_bn + sym_bm + sym_cd4n + sym_cd4m + sym_cd8 + sym_gd
count_dic = dict(collections.Counter(merge_marker))
sort_count = sorted(count_dic.items(),key=lambda x : x[1])
unique_marker = []
for t in sort_count:
    if t[1] == 1:
        unique_marker.append(t[0])
    else:
        pass

b = []
for t in a:
    b.append(list(set(t) & set(unique_marker)))
cellmarker_spe_dic_v1 = dict(zip(cells,b))

pd.to_pickle(cellmarker_spe_dic_v1,'/mnt/AzumaDeconv/github/GLDADec/data/domain_info/human_PBMC_CellMarker_8cell_spe_dic_v1.pkl')

In [7]:
for i,k in enumerate(cellmarker_spe_dic_v1):
    print('---',k,'---')
    print(len(cellmarker_spe_dic_v1.get(k)),'markers were collected')
    print()

--- Monocytes ---
49 markers were collected

--- NK cells ---
23 markers were collected

--- B cells naive ---
8 markers were collected

--- B cells memory ---
3 markers were collected

--- T cells CD4 naive ---
13 markers were collected

--- T cells CD4 memory ---
2 markers were collected

--- T cells CD8 ---
83 markers were collected

--- T cells gamma delta ---
4 markers were collected

