# Analyze macrophages in segmented cell nuclei

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import anndata
import geopandas as gpd
import scanpy as sc

import pathlib

from tifffile import imread, imwrite
from csbdeep.utils import normalize

from scipy import sparse
from matplotlib.colors import ListedColormap

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import tme_colormaps

In [None]:
import nuclei_segmentation_plotting as nsp

In [None]:
import sys
sys.path.append('../../shared/src')

In [None]:
new_rc_params = {'text.usetex': False,
"svg.fonttype": 'none'
}
mpl.rcParams.update(new_rc_params)

In [None]:
from importlib import reload
reload(nsp)
reload(tme_colormaps)

# Import data

In [None]:
# TODO: use data links from GEO

In [None]:
sample_name = 'mouse_tumor_361481_2-B23_1556_SM0296'

In [None]:
path_to_data = pathlib.Path('..') / 'data'
path_to_intermediate_data = pathlib.Path('..') / 'data' / 'intermediate'
path_to_intermediate_data.mkdir(parents = True, exist_ok=True)
path_to_results = pathlib.Path('..') / 'results' / sample_name
path_to_results.mkdir(parents = True, exist_ok=True)

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white', figsize = (4,4), dpi_save=300, frameon = False, vector_friendly = False)
sc.settings.figdir = path_to_results
spatial_dot_size = 2.4
spot_size = 100
spatial_transparency = 1

In [None]:
adata = sc.read(path_to_intermediate_data / 'count_area_filtered_adata.h5ad')

In [None]:
img_filename = 'B23.1556.tif'
img = imread(path_to_data / 'raw' / img_filename)
# Percentile normalization of the image
# Adjust min_percentile and max_percentile as needed
min_percentile = 2
max_percentile = 98#95 too low
img = normalize(img, min_percentile, max_percentile)

In [None]:
path_to_intermediate_data

In [None]:
gdf = gpd.read_parquet(path_to_intermediate_data / 'geodataframe.parquet')

In [None]:
gdf

In [None]:
adata.obs.id

In [None]:
adata.shape

In [None]:
MAC_GENES = ['Cd68', 'Csf1r', 'Adgre1', 'Itgam', 'Fcgr1']

In [None]:
adata.var_names_make_unique()

In [None]:
sc.tl.score_genes(adata, gene_list = MAC_GENES, score_name='mac_score')

In [None]:
vmax = adata.obs.mac_score.quantile(0.9)

In [None]:
nsp.plot_obs_key_and_save_image(title = 'Mac score' , gdf= gdf, img = None, adata= adata, obs_key='mac_score', output_name=path_to_results / "mac_score.tiff", vmin = 0, vmax = vmax)

In [None]:
adata.obs.mac_score.describe()

In [None]:
ad_mac = adata[adata.obs.mac_score > 0]

In [None]:
ad_mac.uns['log1p']['base'] = None

In [None]:
# Identify highly variable genes in the dataset using the Seurat method
sc.pp.highly_variable_genes(ad_mac, flavor="seurat", n_top_genes=2000)
sc.pp.pca(ad_mac)
sc.pp.neighbors(ad_mac)

In [None]:
sc.tl.umap(ad_mac)

In [None]:
# Adjust the resolution parameter as needed for different samples
sc.tl.leiden(ad_mac, resolution=0.5, key_added= 'leiden_0.5')

# Fig 2B

In [None]:
ax = sc.pl.umap(ad_mac, color = 'leiden_0.5', palette=tme_colormaps.cluster_cmap, show = False)
ax.set_aspect('equal')
plt.show()

In [None]:
nsp.plot_clusters_and_save_image('leiden_0.5', gdf = gdf, img = None, adata = ad_mac, color_by_obs='leiden_0.5', color_list=ad_mac.uns['leiden_0.5_colors'])

In [None]:
sc.tl.rank_genes_groups(ad_mac, groupby='leiden_0.5', method = 'wilcoxon')

In [None]:
sc.pl.rank_genes_groups(ad_mac, sharey=False, save = '_leiden0.5.svg')

In [None]:
ranked_genes = sc.get.rank_genes_groups_df(ad_mac, group = None)

In [None]:
ranked_genes.to_csv(path_to_results / 'ranked_genes_leiden05.csv')

In [None]:
rois = dict()
rois['top_left'] = (11591,8072,17200,12176)
rois['bottom_right'] = (23288,15802,27803,19290)
rois['bottom_right_enlarged'] = (22696,15767,27742,19310)
rois['top_right']= (21373,8345,26435,12244)
rois['middle'] = (14822,13042,20042,16829)

In [None]:
for name, coord in rois.items():
    nsp.plot_clusters_and_save_image(title = '{}: leiden_0.5'.format(name), gdf = gdf, img = None, adata = ad_mac, bbox = coord, color_by_obs='leiden_0.5', output_name=path_to_results / 'leiden_05_{}.tiff'.format(name), color_list=ad_mac.uns['leiden_0.5_colors'])

# Convert polygons to dots for easier plotting

In [None]:
gdf['centroid'] = gdf.geometry.centroid

In [None]:
gdf['x'] = gdf.centroid.x
gdf['y'] = gdf.centroid.y

In [None]:
import seaborn as sns

In [None]:
ad_mac.obs_names

In [None]:
coords = gdf[['id', 'x', 'y']]

In [None]:
coords.set_index(keys = 'id', drop=True, inplace=True)

In [None]:
coords

In [None]:
ad_mac.shape

In [None]:
coords = coords.loc[ad_mac.obs_names, :]

In [None]:
ad_mac.obsm['X_spatial'] = coords.to_numpy()

In [None]:
ad_mac.obsm

In [None]:
ad_mac.obsm['X_spatial']

In [None]:
gene_list = ['Spp1', 'Cxcl9', 'Cd74', 'H2-Ab1', 'Hmox1']

In [None]:
for gene in gene_list:
    sc.pl.spatial(ad_mac, color = gene, spot_size=spot_size, size = spatial_dot_size, vmax = 'p99', img = None, cmap = tme_colormaps.gene_expression)

# Co-expression plots (Spp1, Cxcl9, Cd74)

In [None]:
df = sc.get.obs_df(ad_mac, keys = ['Spp1', 'Cxcl9', 'Cd74', 'H2-Ab1'])

In [None]:
#define conditions
conditions = [
    (df['Cxcl9'] > 0) & (df['Spp1'] > 0),
    (df['Cxcl9'] > 0) & (df['Spp1'] == 0),
    (df['Cxcl9'] == 0)& (df['Spp1'] > 0),
    (df['Cxcl9'] == 0)& (df['Spp1'] == 0),
]

#define results
results = ['Cxcl9 & Spp1', 'Cxcl9', 'Spp1', 'nothing']

#create new column based on conditions in column1 and column2
ad_mac.obs['Cxcl9_Spp1'] = np.select(conditions, results)

In [None]:
#define conditions
conditions = [
    (df['Cd74'] > 0) & (df['Spp1'] > 0),
    (df['Cd74'] > 0) & (df['Spp1'] == 0),
    (df['Cd74'] == 0)& (df['Spp1'] > 0),
    (df['Cd74'] == 0)& (df['Spp1'] == 0),
]

#define results
results = ['Cd74 & Spp1', 'Cd74', 'Spp1', 'nothing']

#create new column based on conditions in column1 and column2
ad_mac.obs['Cd74_Spp1'] = np.select(conditions, results)

Cd74_Spp1_colors= {'Cd74': tme_colormaps., 'Spp1':'Cyan', 'Cd74 & Spp1': 'Magenta', 'nothing': 'lightgrey'}

In [None]:
sc.pl.spatial(ad_mac, color = 'Cd74_Spp1', img_key=None, spot_size=spot_size, size = spatial_dot_size, alpha = 0.7, palette=tme_colormaps.set_of_three_plus_bg, save = '_Cd74_Spp1_coexpression.png')

In [None]:
sc.pl.spatial(ad_mac, color = 'Cxcl9_Spp1', img_key=None, spot_size=spot_size, size = spatial_dot_size, alpha = 0.7, palette=tme_colormaps.set_of_three_plus_bg, save = '_Cxcl9_Spp1_coexpression.png')

# Clustering

# Fig 2D

In [None]:
sc.pl.spatial(ad_mac, color = 'leiden_0.5', spot_size=spot_size, size = 1.6, img = None, palette=tme_colormaps.cluster_cmap)

# Fig 2 A (left): Mac score

In [None]:
# use for publication
sc.pl.spatial(ad_mac, color = 'mac_score', spot_size=spot_size, size = spatial_dot_size, img = None, cmap=tme_colormaps.score_mac)

# KeapKOvsWT scoring

In [None]:
database_path = pathlib.Path('../../shared/databases/')
numof_genes = 150
KOvsWT_down_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Down-Regulated.csv'.format(numof_genes), header=0).gene_name
KOvsWT_up_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Up-Regulated.csv'.format(numof_genes), header=0).gene_name

In [None]:
sc.tl.score_genes(ad_mac, gene_list=KOvsWT_down_genelist, score_name='KOvsWT_down_score')

In [None]:
sc.tl.score_genes(ad_mac, gene_list=KOvsWT_up_genelist, score_name='KOvsWT_up_score')

In [None]:
ad_mac.obs['KOvsWT_up_minus_down_score'] = ad_mac.obs.KOvsWT_up_score - ad_mac.obs.KOvsWT_down_score
vmax = ad_mac.obs.KOvsWT_up_minus_down_score.abs().quantile(q = 0.99)
vmin = -vmax
sc.pl.spatial(ad_mac[ad_mac.obs.mac_score > 0], color = 'KOvsWT_up_minus_down_score', sort_order = False, img_key=None, spot_size=spot_size, size = spatial_dot_size, alpha = spatial_transparency, vmax = vmax, vmin = vmin, color_map=tme_colormaps.diverging_score, save = 'KOvsWTK_up_minus_down_score.png')

In [None]:
ad_mac.obs['KOvsWT_up_minus_down_score_normalized'] = ad_mac.obs['KOvsWT_up_minus_down_score'] - ad_mac.obs['KOvsWT_up_minus_down_score'].mean()

In [None]:
#define conditions
conditions = [
    (df['Cxcl9'] > 0) & (df['Spp1'] > 0),
    (df['Cxcl9'] > 0) & (df['Spp1'] == 0),
    (df['Cxcl9'] == 0)& (df['Spp1'] > 0),
    (df['Cxcl9'] == 0)& (df['Spp1'] == 0),
]

#define results
results = ['Cxcl9 & Spp1', 'Cxcl9', 'Spp1', 'nothing']

#create new column based on conditions in column1 and column2
ad_mac.obs['Cxcl9_Spp1'] = np.select(conditions, results)

Cxcl9_Spp1_colors= {'Cxcl9':'Yellow', 'Spp1':'Cyan', 'Cxcl9 & Spp1': 'Magenta', 'nothing': 'lightgrey'}

In [None]:
sc.pl.spatial(ad_mac, color = 'Cxcl9_Spp1', img_key=None,  size = spatial_dot_size, spot_size=spot_size, alpha = 0.7, palette=tme_colormaps.set_of_three_plus_bg, save = '_Cxcl9_Spp1_coexpression.png')

In [None]:
#define conditions
conditions = [
    (df['Cd74'] > 0) & (df['Spp1'] > 0),
    (df['Cd74'] > 0) & (df['Spp1'] == 0),
    (df['Cd74'] == 0)& (df['Spp1'] > 0),
    (df['Cd74'] == 0)& (df['Spp1'] == 0),
]

#define results
results = ['Cd74 & Spp1', 'Cd74', 'Spp1', 'nothing']

#create new column based on conditions in column1 and column2
ad_mac.obs['Cd74_Spp1'] = np.select(conditions, results)

Cd74_Spp1_colors= {'Cd74':'Yellow', 'Spp1':'Cyan', 'Cd74 & Spp1': 'Magenta', 'nothing': 'lightgrey'}

In [None]:
sc.pl.spatial(ad_mac, color = 'Cd74_Spp1', img_key=None,  size = spatial_dot_size, spot_size=spot_size, alpha = 0.7, palette=tme_colormaps.set_of_three_plus_bg, save = '_Cd74_Spp1_coexpression.png')

In [None]:
obs_keys_to_extract = ['leiden_0.5', 'Cxcl9_Spp1', 'Cd74_Spp1', 'mac_score', 'KOvsWT_up_minus_down_score']
genes_to_extract = ['Cxcl9', 'Cd74', 'Spp1']
coords_to_extract = [('X_spatial', 0),('X_spatial', 1), ('X_umap', 0), ('X_umap', 1)]

In [None]:
df_export_for_plotting = sc.get.obs_df(ad_mac, keys=obs_keys_to_extract+genes_to_extract, obsm_keys=coords_to_extract)

In [None]:
df_export_for_plotting

In [None]:
df_export_for_plotting.to_csv(path_to_results / 'data_export_single_nuclei_{}.csv'.format(sample_name))