## Result 2: VSI
Vertical signal integrity (VSI) was accessed using `ovrl.py` (See `Ovrlpy_all_transcripts.ipynb`).  

This notebook focuses on the VSI map at Bregma = -0.24 and VSI differences between MOD1 and MOD2.

### data

In [None]:
import sys
import os

sys.path.append(os.path.abspath("../src"))

from pathlib import Path

from utils import extract_cell_vsi
from plot import plot_VSI_map, plot_vsi_with_named_squares, plot_vsi_distribution_comparison, plot_normalized_histogram, plot_vsi_qqplot

from IPython.display import display
import numpy as np
import pandas as pd

#### Signals in the Tissue Section

In [None]:
MERFISH_data_folder_path = Path("../data/mouse_hypothalamus/MERFISH/")

In [None]:
columns = [
    "Centroid_X",
    "Centroid_Y",
    "Centroid_Z",
    "Gene_name",
    "Cell_name",
    "Total_brightness",
    "Area",
    "Error_bit",
    "Error_direction",
]

signal_coordinate_df = pd.read_csv(
    MERFISH_data_folder_path / "merfish_barcodes_example.csv", usecols=columns
).rename(
    columns={
        "Centroid_X": "x",
        "Centroid_Y": "y",
        "Centroid_Z": "z",
        "Gene_name": "gene",
    }
)


# remove dummy molecules
signal_coordinate_df = signal_coordinate_df.loc[
    ~signal_coordinate_df["gene"].str.contains("Blank|NegControl"),
]

signal_coordinate_df["gene"] = signal_coordinate_df["gene"].astype("category")

# shift the coordinates to avoid the negative values
coordinate_x_m =  signal_coordinate_df['x'].min()
coordinate_y_m =  signal_coordinate_df['y'].min()
signal_coordinate_df['x'] = signal_coordinate_df['x'] - coordinate_x_m
signal_coordinate_df['y'] = signal_coordinate_df['y'] - coordinate_y_m

# make a copy to avoid SettingWithCopyWarning
signal_coordinate_df = signal_coordinate_df.copy()

#### Results of Ovrlpy

results:  
- signal integrity  
- signal strength  

In [None]:
ovrlpy_result_folder = Path("../data/results/VSI")

In [None]:
signal_integrity = np.loadtxt(ovrlpy_result_folder/"SignalIntegrity.txt")
signal_strength = np.loadtxt(ovrlpy_result_folder/"SignalStrength.txt")

#### Results of BANKSY

In [None]:
banksy_folder_path = Path("../data/banksy_results/")

In [None]:
columns = [
    "Centroid_X",
    "Centroid_Y",
    "Bregma",
    "lam0.2",
]

banksy_result = pd.read_csv(
    banksy_folder_path / 'banksy_cluster.txt', usecols=columns, sep = '\t'
).rename(
    columns={
        "Centroid_X": "x",
        "Centroid_Y": "y",
        "Bregma": "Bregma",
        "lam0.2": "banksy_cluster",
    }
)

banksy_result = banksy_result[banksy_result['Bregma'] == -0.24]

banksy_result['x'] = banksy_result['x'] - coordinate_x_m
banksy_result['y'] = banksy_result['y'] - coordinate_y_m

banksy_result = banksy_result.copy()

#### Segmentation Dataset

In [None]:
merfish_data = pd.read_csv(
    MERFISH_data_folder_path / "merfish_all_cells.csv"
    ).rename(
    columns={
        "Centroid_X": "x",
        "Centroid_Y": "y"
    }
)

merfish_data = merfish_data.drop(columns=[col for col in merfish_data.columns if col == 'Fos' or col.startswith('Blank_')])
merfish_data = merfish_data[merfish_data["Cell_class"] != "Ambiguous"]
merfish_data = merfish_data[merfish_data['Animal_ID'] == 1]
merfish_data = merfish_data[merfish_data['Bregma'] == -0.24]

merfish_data['x'] = merfish_data['x'] - coordinate_x_m
merfish_data['y'] = merfish_data['y'] - coordinate_y_m

merfish_data['banksy'] = banksy_result['banksy_cluster'].values

merfish_data = merfish_data.copy()

In [None]:
cell_class_m = {'Astrocyte': 'Astrocyte',
 'Endothelial 1': 'Endothelial',
 'Endothelial 2': 'Endothelial',
 'Endothelial 3': 'Endothelial',
 'Ependymal': 'Ependymal',
 'Excitatory': 'Excitatory',
 'Inhibitory': 'Inhibitory',
 'Microglia': 'Microglia',
 'OD Immature 1': 'OD Immature',
 'OD Immature 2': 'OD Immature',
 'OD Mature 1': 'OD Mature',
 'OD Mature 2': 'OD Mature',
 'OD Mature 3': 'OD Mature',
 'OD Mature 4': 'OD Mature',
 'Pericytes': 'Pericytes'}

merfish_data['Cell_class'] = merfish_data['Cell_class'].map(cell_class_m)
merfish_data = merfish_data.sort_values(by='Cell_class')

merfish_data = merfish_data.copy()

In [None]:
# original cell types
merfish_024 = merfish_data[merfish_data['Bregma'] == -0.24]

class_024 = {'Astrocyte':1,
 'Endothelial':2,
 'Ependymal':3,
 'Excitatory':4,
 'Inhibitory':5,
 'Microglia':6,
 'OD Immature':7,
 'OD Mature':8,
 'Pericytes':9}

merfish_024['Cell_class'] = merfish_024['Cell_class'].map(class_024)

#### Cell Boundaries Dataset

In [None]:
boundaries_df = pd.read_csv(MERFISH_data_folder_path/'cellboundaries_example_animal.csv')
boundaries_df = boundaries_df.dropna(subset=['boundaryX', 'boundaryY'])

In [None]:
cell_ids = merfish_data['Cell_ID']
boundaries_df = boundaries_df[boundaries_df['feature_uID'].isin(cell_ids)]
boundaries_df = boundaries_df.merge(
    merfish_data[['Cell_ID', 'x', 'y', 'banksy']],
    left_on='feature_uID',
    right_on='Cell_ID',
    how='inner'
)
boundaries_df = boundaries_df.drop(columns=['Cell_ID'])

boundaries_df['boundaryX'] = boundaries_df['boundaryX'].apply(lambda x: [float(i) for i in x.split(';')] if isinstance(x, str) else x)
boundaries_df['boundaryY'] = boundaries_df['boundaryY'].apply(lambda x: [float(i) for i in x.split(';')] if isinstance(x, str) else x)

boundaries_df['boundaryX'] = boundaries_df['boundaryX'].apply(lambda x: [i - coordinate_x_m for i in x] if isinstance(x, list) else x)
boundaries_df['boundaryY'] = boundaries_df['boundaryY'].apply(lambda x: [i - coordinate_y_m for i in x] if isinstance(x, list) else x)

boundaries_df = boundaries_df.copy()

#### MOD
BANKSY cluster:  
- 7: MMOD2 / MOD2  
- 8: MMOD1 / MOD1

In [None]:
# boundaries, MOD
# MOD_boundaries = boundaries_df[(boundaries_df['banksy'] == 8) | (boundaries_df['banksy'] == 7)]
MOD1_boundaries = boundaries_df[boundaries_df['banksy'] == 8]
MOD2_boundaries = boundaries_df[boundaries_df['banksy'] == 7]

### VSI across the slice
Main findings:  
- Lower VSI at cell borders
- May cause false cell identification

#### overall

In [None]:
SIM = plot_VSI_map(
    cell_integrity = signal_integrity,
    cell_strength = signal_strength,
    signal_threshold=3.0,
    figure_height=10,
    cmap="BIH",
    side_display="hist",
    plot_boundarys=False,
    plot_centroid=False,
)

display(SIM)

In [None]:
# VSI map without cell boundaries
SIM_noboundaries = plot_VSI_map(
    cell_integrity = signal_integrity,
    cell_strength = signal_strength,
    signal_threshold=3.0,
    figure_height=10,
    cmap="BIH",
    side_display="colorbar",
    plot_boundarys=False,
    plot_centroid=False,
)

display(SIM_noboundaries)

#### cell level

In [None]:
cell_integrity, cell_strength = extract_cell_vsi(
    boundary_df=boundaries_df, 
    integrity=signal_integrity,
    strength=signal_strength,
    integrity_size=1800
)

In [None]:
cell_SIM_nb = plot_VSI_map(
    cell_integrity = cell_integrity,
    cell_strength = cell_strength,
    signal_threshold=3.0,
    figure_height=10,
    cmap="BIH",
    side_display="hist",
    # side_display="colorbar",
    plot_boundarys=False,
    plot_centroid=False,
)

display(cell_SIM_nb)

In [None]:
cell_SIM = plot_VSI_map(
    cell_integrity = cell_integrity,
    cell_strength = cell_strength,
    signal_threshold=3.0,
    figure_height=10,
    cmap="BIH",
    side_display=None,
    boundary_df=boundaries_df,
    plot_boundarys=True,
    boundary_color="yellow",
    boundary_width=1.5,
    plot_centroid=False,
)

#### ROI for findings
ROIs show the low vsi at borders and false cell identification

In [None]:
regions = [
    {"x": 400, "y": 1190, "name": None},
    {"x": 1400, "y": 250, "name": None},
    {"x": 50, "y": 440, "name": None},
    {"x": 1350, "y": 950, "name": None},
]

In [None]:
VSI_ROI = plot_vsi_with_named_squares(cell_integrity, cell_strength, named_squares=regions)

display(VSI_ROI)

In [None]:
# region 1

# Access the heatmap axis and set zoomed-in limits
ax = cell_SIM.axes[0]
ax.set_ylim(1190, 1290)
ax.set_xlim(400, 500)

cell_SIM

In [None]:
# region 2

# Access the heatmap axis and set zoomed-in limits
ax = cell_SIM.axes[0]  
ax.set_ylim(250, 350)
ax.set_xlim(1400, 1500)

cell_SIM

In [None]:
# region 3

# Access the heatmap axis and set zoomed-in limits
ax = cell_SIM.axes[0]  
ax.set_ylim(440, 540)
ax.set_xlim(50, 150)

cell_SIM

In [None]:
# region 4

ax = cell_SIM.axes[0]  
ax.set_ylim(950, 1050)
ax.set_xlim(1350, 1450)

cell_SIM

### VSI in MODs
compare the VSI values in MOD1 and MOD2  
- MOD2 shows lower VSI

#### MOD1 vs. MOD2

In [None]:
MOD1_int, MOD1_str = extract_cell_vsi(MOD1_boundaries, signal_integrity, signal_strength)
MOD2_int, MOD2_str = extract_cell_vsi(MOD2_boundaries, signal_integrity, signal_strength)

In [None]:
vals1, bins1, vals2, bins2 = plot_vsi_distribution_comparison(
    cell_integrity_1=MOD1_int,
    cell_strength_1=MOD1_str,
    cell_integrity_2=MOD2_int,
    cell_strength_2=MOD2_str,
    signal_threshold=3.0,
    figure_height=7,
    cmap="BIH"
)

In [None]:
plot_normalized_histogram(vals1, vals2, bins=bins1, epsilon= 1e-10)

In [None]:
plot_vsi_qqplot(vals1, bins1, vals2, bins2)