In [None]:
import pandas as pd
import numpy as np
import scipy
import scanpy as sc
from numba import njit
import matplotlib
import matplotlib.pyplot as plt
import re
import statsmodels.stats.multitest
import os
import csv
import tacco as tc

In [None]:
import sys
# Make helper functions available: The notebook expects to be executed either in the sub-workflow directory or in the notebooks directory
sys.path.insert(1, '../'), sys.path.insert(1, '../workflow/'); # prefer to look just one directory up
import helper
sys.path.pop(1), sys.path.pop(1);

get_path = helper.get_paths('mouse_cartana')

In [None]:
default_dpi = 72.0 # matplotlib.rcParams['figure.dpi']
matplotlib.rcParams['figure.dpi'] = 1 * default_dpi

# settings

## visualization settings

In [None]:
compartment_colors = helper.get_colors('compartment')
labels_colors = helper.get_colors('labels')
cluster_colors = helper.get_colors('cluster')
program_colors = helper.get_colors('program')
region_colors = helper.get_colors('region')
mostly_gray_region_colors = helper.get_colors('mostly_gray_region')

In [None]:
figures_folder = get_path('plots')
puck_point_size = 3

## analysis settings

In [None]:
enrichment_method = {'reduction':'sum','normalization':'clr','method':'welch','assume_counts':True,}
p_key = f'p_{enrichment_method["method"]}_fdr_bh'

# Load mouse data

In [None]:
reference = sc.read(f'{get_path("resources","mouse_sc")}/scRNAseq.h5ad')
slideseq = sc.read(f'{get_path("resources","mouse_slideseq")}/slideseq.h5ad')
slideseq = slideseq[slideseq.X.sum(axis=1)>=100].copy()

In [None]:
cartana = pd.read_csv(f'{get_path("resources")}/mouse_cartana.csv.gz',low_memory=False)

# Organize metadata

In [None]:
samples = cartana['SampleID'].unique()

In [None]:
sample_meta = pd.Index(samples).str.extract(r'(?P<State>[a-zA-Z]*)[0-9]*[a-z]?_C(?P<version>[0-9])').set_index(samples)
sample_meta['State'] = sample_meta['State'].map({'cont': 'normal', 'AV': 'premalignant'})
sample_meta['version'] = 'v' + sample_meta['version']

In [None]:
offsets = pd.DataFrame(index=samples)
offsets['x'] = 0.0
offsets['y'] = 0.0

offsets.loc['cont13_C1','x'] = 5000.0
offsets.loc['cont14_C1','x'] = 2000.0
offsets.loc['cont14_C1','y'] = 3000.0

offsets.loc['cont13_C2','x'] = 8000.0
offsets.loc['cont14a_C2','x'] = 1000.0
offsets.loc['cont14a_C2','y'] = 3000.0
offsets.loc['cont14b_C2','x'] = 5000.0
offsets.loc['cont14b_C2','y'] = 4500.0

offsets.loc['AV13_C1','x'] = 5000.0
offsets.loc['AV13_C1','y'] = 1500.0
offsets.loc['AV14_C1','x'] = 8000.0
offsets.loc['AV14_C1','y'] = 4000.0

offsets.loc['AV13_C2','x'] = 8000.0
offsets.loc['AV13_C2','y'] = 1000.0
offsets.loc['AV14_C2','x'] = 6500.0
offsets.loc['AV14_C2','y'] = 8000.0

In [None]:
cartana['State'] = cartana['SampleID'].map(sample_meta['State']).astype('category')
cartana['version'] = cartana['SampleID'].map(sample_meta['version']).astype('category')
cartana['SampleID'] = cartana['SampleID'].astype('category')
cartana.index = pd.RangeIndex(len(cartana))
cartana['x'] += cartana['SampleID'].map(offsets['x'])
cartana['y'] += cartana['SampleID'].map(offsets['y'])

In [None]:
genesV1 = pd.Index(cartana.query('version=="v1"')['gene'].unique())
genesV2 = pd.Index(cartana.query('version=="v2"')['gene'].unique())
genesV1[~genesV1.isin(genesV2)]
genesV2[~genesV2.isin(genesV1)]
genesCommon = genesV2.intersection(genesV1)

In [None]:
cartana = cartana.loc[~cartana['cluster'].isna()].copy()

In [None]:
cartana['TLS'] = cartana['TLS'].astype(int)
cartana['anyTLS'] = (cartana['TLS'] > 0).map({True:'TLS',False:'other'})

In [None]:
tc.utils.merge_annotation(cartana, 'cluster', mapping=reference.obs[['labels','cluster',]].drop_duplicates().set_index('labels')['cluster'], result_key='labels');
tc.utils.merge_annotation(cartana, 'cluster', mapping=reference.obs[['cluster_short','cluster',]].drop_duplicates().set_index('cluster_short')['cluster'], result_key='cluster_short');

In [None]:
coords_state_version = {
    'normal_v1': cartana.query('State=="normal" & version=="v1"'),
    'normal_v2': cartana.query('State=="normal" & version=="v2"'),
    'premalignant_v1': cartana.query('State=="premalignant" & version=="v1"'),
    'premalignant_v2': cartana.query('State=="premalignant" & version=="v2"'),
}

# compositions on binned data

In [None]:
bindatas = {}

scale=10
for SampleID,sub_df in cartana.groupby('SampleID'):
    sub_cartana = sub_df.query('cluster!="NAN"').copy()
    np.random.seed(42)
    tc.utils.bin(sub_cartana,bin_size=scale,bin_keys=['bx','by'])
    tc.utils.hash(sub_cartana,keys=['bx','by'],hash_key='b')
    
    bindatas[SampleID] = tc.utils.dataframe2anndata(sub_cartana, 'b', 'gene', count_key=None, compositional_keys=['cluster'], mean_keys=['x','y'])


In [None]:
bindata = sc.concat(bindatas, label='SampleID', join='outer', index_unique='-')

In [None]:
bindata.obsm['cluster'] = bindata.obsm['cluster'].fillna(0.0)

In [None]:
bindata.obs['State'] = bindata.obs['SampleID'].map(sample_meta['State']).astype('category')
bindata.obs['version'] = bindata.obs['SampleID'].map(sample_meta['version'])

In [None]:
tc.utils.merge_annotation(bindata, 'cluster', mapping=reference.obs[['cluster_short','cluster',]].drop_duplicates().set_index('cluster_short')['cluster'], result_key='cluster_short')
tc.utils.merge_annotation(bindata, 'cluster', mapping=reference.obs[['labels','cluster',]].drop_duplicates().set_index('labels')['cluster'], result_key='labels')

In [None]:
bindata.obsm['labels'] = bindata.obsm['labels'][list(labels_colors.keys())]

In [None]:
bindata = tc.pp.filter(bindata, min_counts_per_cell=3, return_view=False)

In [None]:
fig,axs = tc.pl.subplots(2, axsize=(6,3))
tc.pl.contribution(bindata.query('version=="v1"'), 'labels', 'State', normalization='sum', log=False, sample_key='SampleID', colors=labels_colors, ax=axs[0,0]); axs[0,0].set_title('Cartana V1');
tc.pl.contribution(bindata.query('version=="v2"'), 'labels', 'State', normalization='sum', log=False, sample_key='SampleID', colors=labels_colors, ax=axs[0,1]); axs[0,1].set_title('Cartana V2');
fig.savefig(f'{figures_folder}/mouse_cartana_skyline_labels_vs_state.pdf',bbox_inches='tight')

## comparison with scRNA-seq and Slide-Seq

In [None]:
# prepare spatial sample split
tc.utils.split_spatial_samples(bindata, buffer_thickness=400, split_scheme=(2,2), sample_key='SampleID', result_key='SampleID_split', check_splits=False)
tc.utils.split_spatial_samples(slideseq, buffer_thickness=400, split_scheme=(2,2), sample_key='SampleID', result_key='SampleID_split', check_splits=False)

In [None]:
contribution_type = {
    'reduction': 'sum',
    'normalization': 'clr',
    'assume_counts': True,
}

def make_means(contributions, state, rng=None):
    means = {}
    for ds,conts in contributions.items():
        means[ds] = { }
        conts = conts.loc[[state]]
        if rng is not None:
            conts = conts.sample(frac=1,replace=True,random_state=rng)
        for col in conts.columns:
            means[ds][col] = np.mean(conts[col].to_numpy())
    return pd.DataFrame(means)

def make_delta_means(contributions, rng=None):
    delta_means = {}
    for ds,conts in contributions.items():
        delta_means[ds] = { }
        conts_normal = conts.loc[['normal']]
        conts_premalignant = conts.loc[['premalignant']]
        if rng is not None:
            conts_normal = conts_normal.sample(frac=1,replace=True,random_state=rng)
            conts_premalignant = conts_premalignant.sample(frac=1,replace=True,random_state=rng)
        for col in conts.columns:
            delta_means[ds][col] = np.mean(np.subtract.outer(conts_premalignant[col].to_numpy(),conts_normal[col].to_numpy()).flatten())
    return pd.DataFrame(delta_means)

def plot_means_and_errors(mean_means, std_means, title, file_title, colors=None, markers=None):
    if colors is None or markers is None:
        top_items    = mean_means.rank().max(axis=1).sort_values()[-8:][::-1]
        bottom_items = mean_means.rank().min(axis=1).sort_values()[:8]
        colors_8 = np.array(tc.pl.get_default_colors(10))[[0,1,2,3,4,5,6,9]]
        top_items_colors = {k:c for k,c in zip(top_items.index,colors_8)}
        bottom_items_colors = {k:c for k,c in zip(bottom_items.index,colors_8)}
        top_items_markers = {k:'^' for k in top_items.index}
        bottom_items_markers = {k:'v' for k in bottom_items.index}
        if colors is None:
            colors = {**top_items_colors,**bottom_items_colors}
        if markers is None:
            markers = {**top_items_markers,**bottom_items_markers}
    items_for_legend = [k for k in colors.keys()]
    for k in markers.keys():
        if k not in items_for_legend:
            items_for_legend.append(k)
    item_plot_order = [*[k for k in mean_means.index if k not in items_for_legend],*items_for_legend]
    
    fig,axs = tc.pl.subplots(n_ds-1,n_ds-1,axsize=(2,2),sharex='col',sharey='row')
    fig.suptitle(title, y=1.1)
    def get_marker(item):
        return markers[item] if item in markers else ''
    def get_color(item):
        return colors[item] if item in colors else 'lightgray'
    import matplotlib.lines as mlines
    legend_handles = []
    for i_ds,ds_i in enumerate(mean_means.columns):
        for j_ds,ds_j in enumerate(mean_means.columns):
            if i_ds == 0 or j_ds == n_ds - 1:
                continue
            ax = axs[i_ds-1,j_ds]
            if i_ds <= j_ds:
                ax.axis('off')
                continue
            if i_ds == n_ds - 1:
                ax.set_xlabel(ds_j)
            if j_ds == 0:
                ax.set_ylabel(ds_i)
            ax.axhline(color='gray', linewidth=1)
            ax.axvline(color='gray', linewidth=1)
            for item in item_plot_order:
                x = mean_means.loc[item,ds_j]
                y = mean_means.loc[item,ds_i]
                x_err = std_means.loc[item,ds_j]
                y_err = std_means.loc[item,ds_i]
                color = get_color(item)
                marker = get_marker(item)
                #ax.plot(x, y, color=color, marker=marker)
                ax.errorbar(x, y, xerr=x_err, yerr=y_err, capsize=3, elinewidth=1, markeredgewidth=1, color=color, marker=marker)
            r,p = scipy.stats.pearsonr(mean_means.loc[:,ds_i],mean_means.loc[:,ds_j])
            #ax.text(0.05, 0.95, f'r={r:.2f}\np={p:.2g}', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)
            ax.text(0.05, 0.95, f'r={r:.2f}', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes)
    for item in items_for_legend:
        color = get_color(item)
        marker = get_marker(item)
        legend_handles.append(mlines.Line2D([], [], color=color, marker=marker, linestyle='None', markersize=5, label=item))
    axs[0,0].legend(handles=legend_handles, bbox_to_anchor=(1, 1), loc='upper left', ncol=len(items_for_legend)//10+1)
    fig.savefig(f'{figures_folder}/mouse_cartana_scrna_slideseq_correspondence_{file_title}.pdf',bbox_inches='tight')


In [None]:
contributions = {}
sub_data = reference[reference.obs['State'].isin(['normal','premalignant'])]
contributions['scRNA-seq'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID',**contribution_type,reads=False,)
sub_data = slideseq
contributions['Slide-seq'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID_split',**contribution_type,reads=False,)
sub_data = bindata[bindata.obs['version'] == 'v2']
contributions['Cartana V2'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID_split',**contribution_type,reads=False,)
sub_data = bindata[bindata.obs['version'] == 'v1']
contributions['Cartana V1'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID_split',**contribution_type,reads=False,)


In [None]:
contributions = {}
sub_data = reference[reference.obs['State'].isin(['normal','premalignant'])]
contributions['scRNA-seq'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID',**contribution_type,reads=False,)
sub_data = slideseq
contributions['Slide-seq'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID_split',**contribution_type,reads=False,)
sub_data = bindata[bindata.obs['version'] == 'v2']
contributions['Cartana V2'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID_split',**contribution_type,reads=False,)
sub_data = bindata[bindata.obs['version'] == 'v1']
contributions['Cartana V1'] = tc.tl.get_contributions(sub_data,'labels','State',sample_key='SampleID_split',**contribution_type,reads=False,)

normal_means = make_means(contributions, 'normal', rng=None)
rng = np.random.default_rng(42)
boot_normal_means = [ make_means(contributions, 'normal', rng=rng) for i_boot in range(100) ]
mean_boot_normal_means = pd.DataFrame(np.mean(boot_normal_means,axis=0), index=normal_means.index, columns=normal_means.columns)
std_boot_normal_means = pd.DataFrame(np.std(boot_normal_means,axis=0), index=normal_means.index, columns=normal_means.columns)

premalignant_means = make_means(contributions, 'premalignant', rng=None)
rng = np.random.default_rng(42)
boot_premalignant_means = [ make_means(contributions, 'premalignant', rng=rng) for i_boot in range(100) ]
mean_boot_premalignant_means = pd.DataFrame(np.mean(boot_premalignant_means,axis=0), index=premalignant_means.index, columns=premalignant_means.columns)
std_boot_premalignant_means = pd.DataFrame(np.std(boot_premalignant_means,axis=0), index=premalignant_means.index, columns=premalignant_means.columns)

delta_means = make_delta_means(contributions, rng=None)
rng = np.random.default_rng(42)
boot_delta_means = [ make_delta_means(contributions, rng=rng) for i_boot in range(100) ]
mean_boot_delta_means = pd.DataFrame(np.mean(boot_delta_means,axis=0), index=delta_means.index, columns=delta_means.columns)
std_boot_delta_means = pd.DataFrame(np.std(boot_delta_means,axis=0), index=delta_means.index, columns=delta_means.columns)

n_ds = len(mean_boot_normal_means.columns)

labels_markers = { ct:'o' for ct in mean_boot_normal_means.index}
plot_means_and_errors(mean_boot_normal_means, std_boot_normal_means, title='clr-transformed cell type fractions in normal samples\n(with bootstrapped standard error of the mean)', file_title='normal_labels', colors=labels_colors, markers=labels_markers)
plot_means_and_errors(mean_boot_premalignant_means, std_boot_premalignant_means, title='clr-transformed cell type fractions in premalignant samples\n(with bootstrapped standard error of the mean)', file_title='premalignant_labels', colors=labels_colors, markers=labels_markers)
plot_means_and_errors(mean_boot_delta_means, std_boot_delta_means, title='difference of clr-transformed cell type fractions in premalignant and normal samples\n(with bootstrapped standard error of the mean)', file_title='delta_labels', colors=labels_colors, markers=labels_markers)

In [None]:
overall_genesCommon = genesCommon.intersection(reference.var.index).intersection(slideseq.var.index)
contributions = {}
sub_data = reference[reference.obs['State'].isin(['normal','premalignant']),overall_genesCommon]
contributions['scRNA-seq'] = tc.tl.get_contributions(sub_data,None,'State',value_location='X',sample_key='SampleID',**contribution_type,reads=False,)
sub_data = slideseq[:,overall_genesCommon]
contributions['Slide-seq'] = tc.tl.get_contributions(sub_data,None,'State',value_location='X',sample_key='SampleID_split',**contribution_type,reads=False,)
sub_data = bindata[bindata.obs['version'] == 'v2',overall_genesCommon]
contributions['Cartana V2'] = tc.tl.get_contributions(sub_data,None,'State',value_location='X',sample_key='SampleID_split',**contribution_type,reads=False,)
sub_data = bindata[bindata.obs['version'] == 'v1',overall_genesCommon]
contributions['Cartana V1'] = tc.tl.get_contributions(sub_data,None,'State',value_location='X',sample_key='SampleID_split',**contribution_type,reads=False,)

normal_means = make_means(contributions, 'normal', rng=None)
rng = np.random.default_rng(42)
boot_normal_means = [ make_means(contributions, 'normal', rng=rng) for i_boot in range(100) ]
mean_boot_normal_means = pd.DataFrame(np.mean(boot_normal_means,axis=0), index=normal_means.index, columns=normal_means.columns)
std_boot_normal_means = pd.DataFrame(np.std(boot_normal_means,axis=0), index=normal_means.index, columns=normal_means.columns)

premalignant_means = make_means(contributions, 'premalignant', rng=None)
rng = np.random.default_rng(42)
boot_premalignant_means = [ make_means(contributions, 'premalignant', rng=rng) for i_boot in range(100) ]
mean_boot_premalignant_means = pd.DataFrame(np.mean(boot_premalignant_means,axis=0), index=premalignant_means.index, columns=premalignant_means.columns)
std_boot_premalignant_means = pd.DataFrame(np.std(boot_premalignant_means,axis=0), index=premalignant_means.index, columns=premalignant_means.columns)

delta_means = make_delta_means(contributions, rng=None)
rng = np.random.default_rng(42)
boot_delta_means = [ make_delta_means(contributions, rng=rng) for i_boot in range(100) ]
mean_boot_delta_means = pd.DataFrame(np.mean(boot_delta_means,axis=0), index=delta_means.index, columns=delta_means.columns)
std_boot_delta_means = pd.DataFrame(np.std(boot_delta_means,axis=0), index=delta_means.index, columns=delta_means.columns)

n_ds = len(mean_boot_normal_means.columns)

plot_means_and_errors(mean_boot_normal_means, std_boot_normal_means, title='clr-transformed gene fractions in normal samples\n(with bootstrapped standard error of the mean)', file_title='normal_genes')
plot_means_and_errors(mean_boot_premalignant_means, std_boot_premalignant_means, title='clr-transformed gene fractions in premalignant samples\n(with bootstrapped standard error of the mean)', file_title='premalignant_genes')
plot_means_and_errors(mean_boot_delta_means, std_boot_delta_means, title='difference of clr-transformed gene fractions in premalignant and normal samples\n(with bootstrapped standard error of the mean)', file_title='delta_genes')

# TLS composition in celltypes

### for v2 premalignant only

In [None]:
chosen_cats = ['version','State','anyTLS','SampleID','TLS']
pv2_cartana = cartana.query('version=="v2" & State=="premalignant"').copy()
pv2_cartana['SampleID'] = pv2_cartana['SampleID'].cat.remove_unused_categories()
pv2_cartana['blobs'] = tc.utils.hash(pv2_cartana, chosen_cats, compress=False)

In [None]:
blob_data = tc.utils.dataframe2anndata(pv2_cartana, obs_key='blobs', var_key='gene', compositional_keys=[*chosen_cats, 'labels','gene'])
del pv2_cartana

In [None]:
for c in chosen_cats:
    tc.utils.get_maximum_annotation(blob_data,c, result_key=c)
    del blob_data.obsm[c]

In [None]:
blob_data.obs['TLSsize'] = tc.sum(blob_data.X, axis=1)

In [None]:
blob_data.obs['cats'] = (blob_data.obs['State'].astype(str) + " " + blob_data.obs['version'].astype(str) + " " + blob_data.obs['anyTLS'].astype(str)).astype("category")
blob_data.obs['version_State_TLSsize'] = (blob_data.obs['version'].astype(str) + " " + blob_data.obs['State'].astype(str) + " " + blob_data.obs['TLSsize'].map('{:07d}'.format) + " " + blob_data.obs['TLS'].astype(str)).astype("category")
blob_data.obs['version_State_TLSsize_anyTLS'] = (blob_data.obs['version'].astype(str) + " " + blob_data.obs['State'].astype(str) + " " + blob_data.obs['TLSsize'].map('{:07d}'.format) + " " + blob_data.obs['anyTLS'].astype(str)).astype("category")
blob_data.obs['TLS_label'] = (blob_data.obs['anyTLS'].astype(str) + " " + blob_data.obs['TLSsize'].map('{:07d}'.format) + " " + blob_data.obs['SampleID'].astype(str)).astype("category")

In [None]:
top_genes = ((tc.tl.get_compositions(blob_data[blob_data.obs['anyTLS']=='TLS'], 'gene', 'version_State_TLSsize_anyTLS',) > 0.03).sum(axis=0) >= 1)
print(f"{len(top_genes[top_genes])} top_genes: {top_genes[top_genes].index.to_list()!r}")
tc.utils.merge_annotation(blob_data, 'gene', mapping={"other": top_genes[~top_genes].index}, result_key='top_gene');

In [None]:
top_gene_colors={'other':'#DDD',**tc.pl.get_default_colors(top_genes[top_genes].index)}

In [None]:
fig,axs = tc.pl.subplots(1,2, axsize=(7,3),y_padding=1.8)
tc.pl.compositions(blob_data, 'labels', 'TLS_label', ax=axs[0,0], colors=labels_colors);
tc.pl.compositions(blob_data, 'top_gene', 'TLS_label', ax=axs[1,0], colors=top_gene_colors);
fig.savefig(f'{figures_folder}/mouse_cartana_labels_genes_per_TLS_v2_premalignant.pdf',bbox_inches='tight')

## TLSs

In [None]:
sub = cartana.query('State=="premalignant" & version=="v2"').copy()

In [None]:
TLS_means = sub.groupby('TLS')[['x','y']].mean()
TLS_means = TLS_means[TLS_means.index != 0]
TLS_view_mins = TLS_means - [500,400]
TLS_view_maxs = TLS_means + [500,400]
# just show the first two:
TLS_view_mins = TLS_view_mins.iloc[:2]
TLS_view_maxs = TLS_view_maxs.iloc[:2]

In [None]:
TLS_views = { TLSi: sub.query(f'x>{TLS_view_mins.loc[TLSi,"x"]} & x<{TLS_view_maxs.loc[TLSi,"x"]} & y>{TLS_view_mins.loc[TLSi,"y"]} & y<{TLS_view_maxs.loc[TLSi,"y"]}').copy() for TLSi in TLS_view_mins.index }

In [None]:
matplotlib.rcParams['figure.dpi'] = 4 * default_dpi

fig,axs = tc.pl.subplots(2,5,axsize=(5,4),)
tc.pl.scatter(TLS_views,'labels',position_key=['x','y'],joint=True,point_size=1, colors=labels_colors, background_color='black', ax=axs[[0],:],);
for k,sub in TLS_views.items():
    tc.utils.merge_annotation(sub, 'gene', result_key='channel', mapping={'Cd14_Cd68': ['Cd14','Cd68',], 'Cd3d_Cd3e_Cd3g': ['Cd3d','Cd3e','Cd3g'], 'Cd79_Jchain_Igkc':['Cd79', 'Jchain', 'Igkc']})
    sub['channel'] = sub['channel'].astype(pd.CategoricalDtype(['Cd14_Cd68','Cd3d_Cd3e_Cd3g','Cd79_Jchain_Igkc',]))
likeISH_colors = {
 'Cd14_Cd68': '#f0f',
 'Cd3d_Cd3e_Cd3g': '#ff0',
 'Cd79_Jchain_Igkc': '#066',
 }
tc.pl.scatter(TLS_views,'channel',position_key=['x','y'],joint=None,point_size=2, colors=likeISH_colors, background_color='black', ax=axs[1:5,:],);

from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm
fontprops = fm.FontProperties(size=10)
for ax in axs.flatten():
    scalebar = AnchoredSizeBar(ax.transData, 100, '100µm', 'lower right', pad=0.1, color='white', frameon=False, size_vertical=2, fontproperties=fontprops)
    ax.add_artist(scalebar)
    ax.set_yticks([])
    ax.set_xticks([])

matplotlib.rcParams['figure.dpi'] = 1 * default_dpi

fig.savefig(f'{figures_folder}/mouse_cartana_v2_premalignant_TLS.pdf',bbox_inches='tight')

## Endothelial cells

In [None]:
sub = cartana[cartana['State'].isin(["premalignant"]) & cartana['version'].isin(["v2"])].copy()
sub['SampleID'] = sub['SampleID'].cat.remove_unused_categories()

In [None]:
%%time
sub_adata = tc.utils.dataframe2anndata(sub,None,'gene',compositional_keys=['gene'])
for c in ['gene']:
    tc.utils.get_maximum_annotation(sub_adata,c, result_key=c)
    del sub_adata.obsm[c]

In [None]:
%time tc.tl.co_occurrence(sub_adata, 'gene', max_distance=20, delta_distance=1, result_key='cooc', sample_key='SampleID', )

In [None]:
genes = ['Cdh5', 'Pecam1', 'Pdgfrb', ]
other_genes = ['Epcam','Cd68','Jchain','Muc2','Cd3e']
fig = tc.pl.co_occurrence(sub_adata, 'cooc', show_only_center=genes, show_only=[*genes,*other_genes],merged=False);
fig.savefig(f'{figures_folder}/mouse_cartana_Pericytes_and_Endothelial_marker_cooc_v2_premalignant.pdf',bbox_inches='tight')

# Cooccurrences

In [None]:
tc.utils.merge_annotation(bindata, annotation_key='labels', result_key='compartment', mapping={'epithelial': ['Epi'], 'immune': ['TNK', 'B', 'Mac', 'Mono', 'Gran', 'Mast'], 'stromal': ['Fibro', 'Endo']})

In [None]:
def annotate_epithelial_domain(adata, sample_key='sample', critical_neighbourhood_size=4.0):
    tc.tl.annotation_coordinate(adata, 'compartment', sample_key, result_key='comp_dist',max_distance=100,delta_distance=10, critical_neighbourhood_size=critical_neighbourhood_size,sparse=True, verbose=0);
    adata.obs['epi_domain'] = adata.obsm['comp_dist']['stromal'] > 75

In [None]:
annotate_epithelial_domain(bindata, 'SampleID', critical_neighbourhood_size=40)

In [None]:
bindata_premalignant = bindata[bindata.obs['epi_domain'] & (bindata.obs['version'] == 'v2') & (bindata.obs['State'] == 'premalignant')].copy()
analysis_key=f'cluster-labels'
tc.tl.co_occurrence(bindata_premalignant, 'cluster', 'labels', sample_key='SampleID', delta_distance=20, max_distance=500, sparse=True, result_key=analysis_key, verbose=0);

In [None]:
fig = tc.pl.co_occurrence(bindata_premalignant, analysis_key, score_key='log_occ', colors=cluster_colors, wspace=0.3, log_base=2, show_only=['Mono02 (Dysplasia-Associated)', 'Mono03 (Dysplasia-Associated, IFN)', 'Gran01', 'Gran02',], show_only_center=['Endo'], merged=False);
fig.savefig(f'{figures_folder}/mouse_cartana_cooc_premalignant_myeloid_cluster_from_endothelial.pdf',bbox_inches='tight')

In [None]:
bindata_normal = bindata[bindata.obs['epi_domain'] & (bindata.obs['version'] == 'v2') & (bindata.obs['State'] == 'normal')].copy()
analysis_key=f'cluster-cluster'
tc.tl.co_occurrence(bindata_normal, 'cluster', sample_key='SampleID', delta_distance=20, max_distance=500, sparse=True, result_key=analysis_key, verbose=0);

In [None]:
main_Epi = ['Epi02 (Enterocytes)', 'Epi03 (Stem/Progenitors)', 'Epi04 (Secretory)']
fig = tc.pl.co_occurrence(bindata_normal, analysis_key, score_key='log_occ', colors=cluster_colors, wspace=0.3, log_base=2, show_only=main_Epi, show_only_center=main_Epi, merged=False);
fig.savefig(f'{figures_folder}/mouse_cartana_cooc_normal_epi_cluster_from_epi_cluster.pdf',bbox_inches='tight')

# Molecule level views

In [None]:
sub_pm = cartana[(cartana['version'] == 'v2') & (cartana['State'] == 'premalignant') & (cartana['x'] < 4000) & (cartana['y'] > 4500) & (cartana['y'] < 5500)].copy()
sub_no = cartana[(cartana['version'] == 'v2') & (cartana['State'] == 'normal') & (cartana['x'] < 1000) & (cartana['y'] > 6000) & (cartana['y'] < 7000)].copy()
views = {'premalignant': sub_pm, 'normal': sub_no}

In [None]:
matplotlib.rcParams['figure.dpi'] = 4 * default_dpi

fig,axs = tc.pl.subplots(2,9,axsize=(16,4),width_ratios=(1,0.25), x_padding=0.3)
for i_view, (k_view,view) in enumerate(views.items()):
    tc.pl.scatter({k_view: view},'labels',position_key=['x','y'],joint=True,point_size=1, colors=labels_colors, background_color='black', ax=axs[:1,[i_view]], legend=(i_view==1));
Mono2_genes=['Arg1','Hilpda','Nos2','Cd274','Vegfa','Trem1']
Mono3_genes=['Osm','Ifi204','Thbs1',]
Gran_genes=['S100a9','S100a8',]
Sell_Ccr2_genes=['Sell','Ccr2']
Ptprc_Cd14_genes=['Ptprc','Cd14']
for k,sub in views.items():
    tc.utils.merge_annotation(sub, 'gene', result_key='channel', mapping={'Arg1_Hilpda_Nos2_Cd274_Vegfa_Trem1': Mono2_genes, 'Osm_Ifi204_Thbs1': Mono3_genes, 'S100a9_S100a8': Gran_genes, 'Sell':['Sell'],'Ccr2':['Ccr2'], 'Ptprc':['Ptprc'],'Cd14':['Cd14']})
    sub['channel'] = sub['channel'].astype(pd.CategoricalDtype(['Arg1_Hilpda_Nos2_Cd274_Vegfa_Trem1','Osm_Ifi204_Thbs1','S100a9_S100a8','Sell','Ccr2','Ptprc','Cd14']))
likeISH_colors = {
 'Arg1_Hilpda_Nos2_Cd274_Vegfa_Trem1': '#0ff',
 'Osm_Ifi204_Thbs1': '#f00',
 'S100a9_S100a8': '#ff0',
 'Sell': '#0f0',
 'Ccr2': '#f0f',
 'Ptprc': '#66f',
 'Cd14': '#aaa',
 }
for i_view, (k_view,view) in enumerate(views.items()):
    tc.pl.scatter({k_view: view},'channel',position_key=['x','y'],joint=None,point_size=2, colors=likeISH_colors, background_color='black', ax=axs[1:,[i_view]], legend=(i_view==1));

from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm
fontprops = fm.FontProperties(size=10)
for ax in axs.flatten():
    scalebar = AnchoredSizeBar(ax.transData, 100, '100µm', 'lower left', pad=0.1, color='white', frameon=False, size_vertical=2, fontproperties=fontprops)
    ax.add_artist(scalebar)
    ax.set_yticks([])
    ax.set_xticks([])

matplotlib.rcParams['figure.dpi'] = 1 * default_dpi

fig.savefig(f'{figures_folder}/mouse_cartana_v2_Mono2_Mono3_Gran_Sell_Ccr2_Ptprc_Cd14.pdf',bbox_inches='tight')