## Result 5: MOD markers in scRNA-seq data
Investigated the expression of MOD markers in both ST and scRNA-seq data:  
- in mature oligodendrocytes  
- in non-oligodendrocytes  

to assess 
- whether their expression patterns in scRNA-seq are consistent with those observed in ST data  
- whether these markers are also expressed in other cell types  

In [None]:
from pathlib import Path

import sys
import os
sys.path.append(os.path.abspath("../src"))

from utils import load_merfish_signal_df, load_banksy_result, load_merfish_data, load_scRNA_data
from plot import plot_annotate_heatmap

import pandas as pd

### data

#### Signals in the Tissue Section

In [None]:
MERFISH_data_folder_path = Path("../data/mouse_hypothalamus/MERFISH/")

In [None]:
signal_coordinate_df, coordinate_x_m, coordinate_y_m = load_merfish_signal_df(MERFISH_data_folder_path/"merfish_barcodes_example.csv")

#### Results of BANKSY

In [None]:
banksy_folder_path = Path("../data/banksy_results/")

In [None]:
banksy_result = load_banksy_result(banksy_folder_path/"banksy_cluster.txt", coordinate_x_m, coordinate_y_m)

#### Segmentation Dataset

In [None]:
merfish_data = load_merfish_data(MERFISH_data_folder_path / "merfish_all_cells.csv", 
                                 banksy_result, coordinate_x_m, coordinate_y_m, 
                                 animal_id=1, bregma_value=-0.24)

#### Matched Single-Cell RNA Sequencing

In [None]:
data_path = Path("../data/mouse_hypothalamus/SingleCell/")
mtx_path = data_path / "GSE113576_matrix.mtx"
barcodes_path = data_path / "GSE113576_barcodes.tsv"
genes_path = data_path / "GSE113576_genes.tsv"
meta_path = data_path / "aau5324_Moffitt_Table-S1.xlsx"

In [None]:
cell_class_dict = {
    'Astrocytes': 'Astrocytes',
    'Microglia': 'Microglia',
    'Macrophage': 'Macrophage',
    'Excitatory': 'Excitatory',
    'Inhibitory': 'Inhibitory',
    'Immature oligodendrocyte': 'OD immature',
    'Newly formed oligodendrocyte': 'OD newly formed',
    'Mature oligodendrocyte': 'OD mature',
    'Ependymal': 'Ependymal',
    'Endothelial': 'Endothelial',
    'Mural': 'Mural',
    'Fibroblast': 'Fibroblast',
}

adata = load_scRNA_data(
    mtx_path, 
    barcodes_path, 
    genes_path, 
    meta_path, 
    cell_class_filter = cell_class_dict
)

#### Marker Genes

differentially expressed genes identified by BANKSY

In [None]:
# all differentially expressed genes
DE_genes = ['Mbp', 'Lpar1', 'Trh', 'Ucn3', 'Cck', 'Mlc1', 'Dgkk', 'Cbln2', 'Syt4', 'Gad1', 'Plin3', 'Gnrh1', 'Sln', 'Gjc3']
# DE_genes_MOD2: 7
DE_genes_MOD2 = ['Mlc1', 'Dgkk', 'Cbln2', 'Syt4', 'Gad1', 'Plin3', 'Gnrh1', 'Sln', 'Gjc3']
# DE_genes_MOD1: 8
DE_genes_MOD1 = ['Mbp', 'Lpar1', 'Trh', 'Ucn3', 'Cck']

In [None]:
sc_data = adata.to_df()
sc_DE_MOD2_df = sc_data[DE_genes_MOD2]
sc_DE_MOD1_df = sc_data[DE_genes_MOD1]
sc_DE = pd.concat([sc_DE_MOD1_df, sc_DE_MOD2_df], axis=1)

sc_cell_class = adata.obs['Cell_class']

In [None]:
# MERFISH, MOD, Marker Genes
MOD_merfish = merfish_data[(merfish_data['banksy']==8) | (merfish_data['banksy']==7)]
MOD_merfish = MOD_merfish.sort_values(by='banksy')

common_genes = [gene for gene in DE_genes if gene in MOD_merfish.columns]
MOD_merfish_DE = MOD_merfish[common_genes].T

MOD_banksy = MOD_merfish['banksy']

In [None]:
Gene_Group = pd.DataFrame([0,0,0,0,0,0,0,0,0,1,1,1,1,1], columns=['cluster'])

### Heatmaps

#### scRNA-seq: Marker Genes in MOD

In [None]:
MOD_sc_data = pd.concat([sc_DE, sc_cell_class], axis=1)
MOD_sc_data = MOD_sc_data[MOD_sc_data['Cell_class'].str.startswith("OD mat")]

MOD_sc_cell_class = MOD_sc_data['Cell_class']
MOD_sc_data = MOD_sc_data.drop('Cell_class', axis=1)

In [None]:
box_specs = [
    {"x_offset": 500, "width": 350, "color": "green"},
    {"x_offset": 1350, "width": 250, "color": "orange"},
    {"x_offset": 2620, "width": 200, "color": "blue"},
    {"x_offset": 4800, "width": 1800, "color": "red"}
]

In [None]:
plot_annotate_heatmap(cluster_data = MOD_sc_data, cluster_labels=MOD_sc_cell_class, gene_groups=Gene_Group, show_cluster=False, box_specs=box_specs)

#### scRNA-seq: Marker Genes in non-OD Cell Types

In [None]:
noOD_sc_data = pd.concat([sc_DE, sc_cell_class], axis=1)
noOD_sc_data = noOD_sc_data[~noOD_sc_data['Cell_class'].str.startswith("OD")]

noOD_sc_cell_class = noOD_sc_data['Cell_class']
noOD_sc_data = noOD_sc_data.drop('Cell_class', axis=1)

In [None]:
plot_annotate_heatmap(cluster_data = noOD_sc_data, cluster_labels=noOD_sc_cell_class, gene_groups=Gene_Group, cluster_text_y=-1.3)

#### MERFISH: Marker Genes in MOD

In [None]:
MOD_banksy = MOD_banksy.replace({7: 'MOD2', 8: 'MOD1'})

In [None]:
plot_annotate_heatmap(cluster_data=MOD_merfish_DE.T, cluster_labels=MOD_banksy, gene_groups=Gene_Group, cluster_text_y=-0.9)

#### MERFISH: Marker Genes in non-OD cell types

In [None]:
OD_mask = merfish_data['Cell_class'].str.startswith("OD")
no_OD_merfish_data = merfish_data.drop(merfish_data.index[OD_mask], axis=0, inplace=False)

common_genes_gm = [gene for gene in DE_genes_MOD2 if gene in no_OD_merfish_data.columns]
no_OD_merfish_DE_gm = no_OD_merfish_data[common_genes_gm]

common_genes_wm = [gene for gene in DE_genes_MOD1 if gene in no_OD_merfish_data.columns]
no_OD_merfish_DE_wm = no_OD_merfish_data[common_genes_wm]

no_OD_merfish_DE = pd.concat([no_OD_merfish_DE_gm, no_OD_merfish_DE_wm], axis=1).T

no_OD_cell_class = no_OD_merfish_data['Cell_class']

In [None]:
plot_annotate_heatmap(cluster_data=no_OD_merfish_DE.T, cluster_labels=no_OD_cell_class, gene_groups=Gene_Group)