In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from matplotlib import pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
from micron2.spatial import celltype_distances

from statannot import add_stat_annotation
from itertools import combinations

from matplotlib import rcParams

In [None]:
# toss_regions_1 = ['TMA1_'+r for r in ['reg3', 'reg35', 'reg21', 'reg22', 'reg23', 'reg24', 'reg25', 'reg11']]
toss_regions_1 = [f'TMA1_reg{r}' for r in [1, 11, 21, 22, 23, 24, 35]]
toss_regions_2 = [f'TMA2_reg{r}' for r in [2, 20, 26, 34,      21, 22]]
toss_regions_3 = [f'TMA3_reg{r}' for r in [1, 2, 7, 21, 23, 25, 33,      22, 16]]
toss_regions = toss_regions_1 + toss_regions_2 + toss_regions_3
def maybe_keep(p):
    for r in toss_regions:
        if r+'.h5ad' in p:
            return False
    return True

adata_paths = !ls /storage/codex/preprocessed_data/*Bladder*/*.h5ad
adata_paths = [p for p in adata_paths if maybe_keep(p)]
adata_paths

In [None]:
u_cells = [
    'Bcell',
    'CD4T',
    'CD4T_Treg',
    'CD8T',
    'CD8T_Trm',
    'CD8T_activated',
    'CD8T_exhausted',
    'Endothelial',
    'Epithelial',
    'Epithelial_CDH',
    'Epithelial_KRT',
    'Mac',
    'Stromal',
]

In [None]:
help(celltype_distances)

In [None]:
import os
from itertools import product as it_prod
import pandas as pd
import tqdm.auto as tqdm

cellular_distances = {}
min_cells = 50

with tqdm.tqdm(adata_paths) as pbar:
    for pth in pbar:
        sample = os.path.basename(pth).replace('.h5ad','')
        ad = sc.read_h5ad(pth)
        pbar.set_description(f'{sample}: {ad.shape}')
        coords = ad.obsm['coordinates'].astype(np.float32)
        celltypes = np.array(ad.obs.subtype_rescued)
        
        dists = pd.DataFrame(index=u_cells, columns=u_cells, dtype=np.float32)
        for c1,c2 in it_prod(u_cells,u_cells):
            if c1==c2: 
                continue
            if (np.sum(celltypes==c1)<min_cells) or (np.sum(celltypes==c2)<min_cells):
                continue
            d = celltype_distances(coords, celltypes, c1, c2, k=10, mode='nearest', 
                                   summary_fn=np.median)
            dists.loc[c1,c2] = np.median(d)
            
        cellular_distances[sample] = dists.copy()

In [None]:

ds = []
samples = []
queries = []
targets = []

t = 'CD8T_Trm'
qs = ['Epithelial_KRT', 'Epithelial', 'Epithelial_CDH']
for q in qs:

    vals = []
    for k,v in cellular_distances.items():
        z = v.loc[q,t]
        if np.isnan(z):
            continue
        ds.append(np.log10(z))
        samples.append(k)
        queries.append(q)
        targets.append(t)

        
data = {'Distances': ds,
        'Samples': samples,
        'Query_cell': queries,
        'Target_cell': targets,}
distances = pd.DataFrame( data )

rcParams['figure.facecolor'] = (1,1,1,1)
plt.figure(figsize=(3,3), dpi=90)
ax = plt.gca()
sns.boxplot(data=distances, x='Query_cell', y='Distances', ax=ax, color='w')
sns.stripplot(data=distances, x='Query_cell', y='Distances', color='k', ax=ax)

_ = add_stat_annotation(data=distances, x='Query_cell', y='Distances', ax=ax,
                        box_pairs=[(c1,c2) for c1,c2 in combinations(qs,2) if c1!=c2],
                        text_format='simple',
                        test='Mann-Whitney',
                        comparisons_correction=None
                       )

ax.set_xticklabels(ax.get_xticklabels(), rotation=30)
plt.title(f'Median distance to nearest {t} per biopsy')
# plt.title(f'Mean distance to nearby {t} per biopsy')
