# Markers for cluster annotations acros modalities

## RNA

In [None]:
marker_genes = {
    "CD14+ Mono": ["FCN1", "CD14"],
    "CD16+ Mono": ["TCF7L2", "FCGR3A", "LYN"],
    "ID2-hi myeloid prog": [
        "CD14",
        "ID2",
        "VCAN",
        "FOS",
        "JUN",
        "EGR1",
        "KLF4",
        "PLAUR",
    ],
    "cDC1": ["CLEC9A", "CADM1"],
    "cDC2": [
        "CLEC10A",
        "FCER1A",
        "CST3",
        "COTL1",
        "LYZ",
        "DMXL2",
    ],  # Note: DMXL2 should be negative
    "Reticulocyte": [
        "SLC4A1",
        "SLC25A37",
        "HBB",
        "HBA2",
        "HBA1",
        "TFRC",
    ],  # TFRC negative
    "Normoblast": [
        "SLC4A1",
        "SLC25A37",
        "HBB",
        "HBA2",
        "HBA1",
        "TFRC",
    ],  # TFRC positive
    "Erythroblast": ["MKI67", "HBA1", "HBB"],
    "Proerythroblast": [
        "CDK6",
        "SYNGR1",
        "HBM",
        "GYPA",
    ],  # Note HBM and GYPA are negative markers   # CRACD as well, but not present in data
    "NK": [
        "GNLY",
        "NKG7",
        "CD247",
        "FCER1G",
        "TYROBP",
        "KLRG1",
        "FCGR3A",
    ],  # GRIK4 as well, but not present in data
    "ILC": ["ID2", "PLCG2", "GNLY", "SYNE1"],
    "Lymph prog": [
        "IGLL1",
        "VPREB1",
        "MME",
        "EBF1",
        "SSBP2",
        "BACH2",
        "CD79B",
        "IGHM",
        "PAX5",
        "PRKCE",
        "DNTT",
    ],  # new: STMN1
    "Naive CD20+ B - IGKC+": ["MS4A1", "IL4R", "IGHD", "FCRL1", "IGHM", "IGKC"],
    "Naive CD20+ B - IGKC-": [
        "MS4A1",
        "IL4R",
        "IGHD",
        "FCRL1",
        "IGHM",
        "IGKC",
    ],  # IGKC negative
    "B1 B - IGKC+": [
        "MS4A1",
        "SSPN",
        "ITGB1",
        "IGKC",
        "EPHA4",
        "COL4A4",
        "PRDM1",
        "IRF4",
        "CD38",
        "XBP1",
        "PAX5",
        "BCL11A",
        "BLK",
        "IGHD",
        "IGHM",
    ],  # Note IGHD and IGHM are negative markers #ZNF215 not in data
    "B1 B - IGKC-": [
        "MS4A1",
        "SSPN",
        "ccITGB1",
        "EPHA4",
        "COL4A4",
        "PRDM1",
        "IRF4",
        "CD38",
        "XBP1",
        "PAX5",
        "BCL11A",
        "BLK",
        "IGHD",
        "IGHM",
        "IGKC",
    ],  # Note IGKC, IGHD and IGHM are negative markers #ZNF215 not in data
    "Transitional B": ["MME", "CD38", "CD24", "ACSM3", "MSI2"],
    "Plasma cell": ["MZB1", "HSP90B1", "FNDC3B", "PRDM1", "IGKC", "JCHAIN"],
    "Plasmablast": [
        "XBP1",
        "PRDM1",
        "PAX5",
    ],  # Note PAX5 is a negative marker   # RF4 as well, but not present in data
    "CD4+ T activated": ["CD4", "IL7R", "TRBC2", "ITGB1"],
    "CD4+ T naive": ["CD4", "IL7R", "TRBC2", "CCR7"],
    "CD8+ T": ["CD8A", "CD8B", "GZMB", "GZMA", "CCL5", "GZMK", "GZMH", "GZMA"],
    "T activation": ["CD69", "CD38"],  # CD69 much better marker!
    "T naive": ["LEF1", "CCR7", "TCF7"],
    "T reg": ["FOXP3"],
    "Effector CD4+ T": ["KLRB1", "CD4", "PDCD1", "TNF"],
    "Effector CD8+ T": ["KLRK1", "GZMH", "CD8A", "CD8B"],
    "MAIT": ["KLRB1", "CD8A", "GZMK", "IL7R"],
    "gdT": ["KLRC2", "KLRF1", "FCGR3A", "GZMB", "GZMH"],
    "pDC": ["GZMB", "IL3RA", "COBLL1", "TCF4"],
    "G/M prog": ["MPO", "BCL2", "KCNQ5", "CSF3R", "PRTN3"],
    "HSC": ["NRIP1", "MECOM", "PROM1", "CD34", "NKAIN2"],
    "MK/E prog": [
        "ZNF385D",
        "ITGA2B",
        "PLCB1",
    ],  # 'RYR3' as well,  but not present in data
}

## ADT

In [None]:
marker_prots = {
    "CD14+ Mono": ["CD14", "CD11b", "CD62P"],
    "CD16+ Mono": ["CD85j", "CD11c", "CD172a", "CD88"],
    "cDC2": ["CD1c", "FceRIa", "CD11c"],
    "pDC": ["CD303", "CD304"],
    # Ery linage
    "Proerythroblast": ["CD105"],
    "Erythroblast": ["CD82", "CD71"],  # CD82 also in B
    "Normoblast/Reticulocyte": ["CD71"],
    # B linage
    "naive CD20+ B": ["IgD"],
    "B1 B": ["CD40", "CD35", "CD268", "IgD"],  # (IgD- !)
    "transitional B": ["CD38", "CD24"],  # 'CD24', 'CD81', 'CD9'
    "Plasma cell": ["CD63", "CD319"],
    "Plasmablast": ["CD39", "CD319"],  # CD86
    # NK
    "NK": ["CD56", "CD94", "CD335"],  # 'CD57'?
    "NK CD158e1+": ["CD158e1"],
    # Progenitors
    "MK/E prog": ["CD105", "CD112", "CD352"],
    "HSC": ["CD112", "CD13", "CD155"],
    "Lymph prog": ["CD112", "CD81"],  # close to HSC
    "G/M prog": ["CD112", "CD86"],
    # CD4
    "CD4+ T naive": ["CD45RA", "CD4"],  # close to HSC
    "T reg": ["CD25", "CD39", "CD4"],
    "CD4+ T activated": ["CD45RO", "CD4"],  # close to HSC
    "CD4+ T activated CD279+": ["CD279", "CD45RO", "CD4", "CD25"],  # CD25 negative
    "CD4+ T activated CD194+": ["CD194", "CD45RO", "CD4"],
    "CD4+ T activated integrinB7+": ["integrinB7", "CD45RO", "CD4"],
    "CD4+ T CD45RA+ CD314+": ["CD11c", "CD314", "CD45RA", "CD4"],  # 'CD11c' negative
    # CD8, gdT and other
    "CD8+ T naive CD127+ CD26+ CD101+": ["CD127", "CD26", "CD101", "CD45RA", "CD8"],
    "CD8+ T naive CD127+ CD26- CD101-": ["CD127", "CD26", "CD101", "CD45RA", "CD8"],
    "ILC1": ["CD103", "CD127"],
    "MAIT": ["TCRVa7.2", "CD8"],
    "gdT TCRVD2+": ["TCRVd2", "CD161"],
    "gdT CD56+": ["CD56", "CD158b"],
    "gdT Vd1 CD57+": ["CD57", "CD158b"],
    "gdT CD158b+ CD11c+": ["CD158b", "CD11c"],
    "CD8 T CD49f+": ["CD49f", "CD226", "CD45RO"],  # only CD45RO+
    "CD8 T CD57+ CD45RO+": ["CD57", "CD45RO"],  # CD56-
    "CD8 T CD57+ CD45RA+": ["CD57", "CD56", "CD45RA"],
    "CD8 T TIGIT+ CD45RO+": ["TIGIT", "CD45RO"],  # also CD226-
    "CD8 T TIGIT+ CD45RA+": ["TIGIT", "CD45RA"],  #
    "CD8 T CD69+ CD45RO+": ["CD69", "CD45RO"],  # also CD226-
    "CD8 T CD69+ CD45RA+": ["CD69", "CD45RA"],
}

## ATAC

In [None]:
marker_genes = {
    "CD14+ Mono": ["TREM1", "FPR1"],
    "CD16+ Mono": ["LYN", "TCF7L2"],
    "CD14+ Mono prog": ["KLF4"],
    "cDC2": ["TREM1", "DMXL2"],  # Note: DMXL2 should be negative
    "Reticulocyte": ["GATA1", "HBA1", "SLC4A1", "SLC25A37", "KLF1"],
    "Erythroblast": ["GATA1", "MKI67"],
    "Proerythroblast": ["GATA2", "SYNGR1"],
    "NK": ["CD160", "GNLY", "GRIK4", "CD247"],
    "ILC": ["SPON2", "GZMB", "PRF1", "CD69"],  # Note CD69 should be negative
    "Lymph prog": ["MME", "IGLL1", "VPREB1", "CD79B"],
    "Naive CD20+ B": ["MS4A1", "LYN", "CD74", "BLK"],
    "B1 B": ["ITGB1", "MS4A1", "DUSP22", "EPHA4"],
    "Transitional B": ["MME", "CD38", "CD24"],
    "Plasma cells": ["DUSP22", "FNDC3B"],
    "CD4+ T activated": ["JAKMIP1", "C4orf50"],
    "CD4+ T naive": [
        "BCL11B",
        "BACH2",
        "JAKMIP1",
        "C4orf50",
    ],  # Note JAKMIP1 and C4orf50 are negative markers
    "CD8+ T": ["CD8A", "CD8B"],
    "pDCs": ["PTPRS", "TCF4", "KCNN3", "DUSP22"],
    "HSPC/Mono prog": ["MPO", "NT5DC3"],
    "HSC": ["PRDM16", "MTRNR2L1"],
    "MK/E prog": ["DNAJC6"],
}