In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc

import muon as mu

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import liana as li

In [3]:
from functools import reduce

In [4]:
path = os.path.join('..','..', 'data', 'sma')

## Load Prior Knowledge

### Protein-metabolite interactions

In [5]:
# Load metabolite receptor interactions
metalinksdb = pd.read_csv("resources/metalinksdb_w_references.csv")
metalinksdb['ligand'] = metalinksdb['hmdb'].replace(to_replace='"', value='', regex=True)
metalinksdb['receptor'] = metalinksdb['gene_symbol'].replace(to_replace='"', value='', regex=True)
metalinksdb = metalinksdb[['ligand', 'receptor', 'source', 'reference']]
resource = pd.read_csv(os.path.join("resources", 'MR_brain_20230808.csv'), index_col=0)
resource.head()

Unnamed: 0,ligand,receptor
1,HMDB0000112,GABRB2
3,HMDB0000216,ADRA2B
4,HMDB0000068,ADRA2B
20,HMDB0000148,GRM8
49,HMDB0000112,GABRA5


### Export PK

In [6]:
resource_out = resource.merge(metalinksdb, how='left')
resource_out.to_csv(os.path.join("..", "..", "figures", "source", "MetaboliteReceptorPK.csv"))

## Load Slides

In [7]:
slides = pd.read_csv(os.path.join(path, "sma_meta.csv"), index_col=0).index
exp = "V11L12-109" # V11T17-102 V11L12-109 V11T16-085

In [8]:
hvg = []
hvm = []
hvc = []
mdatas = {}

for slide in slides:    
    slide_path = os.path.join(path, exp, f"{exp}_{slide}", "output_data")
    mdatas[slide] = mu.read_h5mu(os.path.join(slide_path, "sma.h5mu"))
    
    print(slide)
    print(mdatas[slide])
    
    msi = mdatas[slide].mod["msi"]
    rna = mdatas[slide].mod["rna"]
    ct = mdatas[slide].mod["deconv"]
    
    # Keep most variable metabolites
    sc.pp.highly_variable_genes(msi, flavor='cell_ranger', n_top_genes=250)
    
    # Remove predictors with no variation 
    sc.pp.highly_variable_genes(rna, flavor='cell_ranger', n_top_genes=12500)
    ct.var['cv'] = ct.X.var(axis=0) / ct.X.mean(axis=0)
    ct.var['highly_variable'] = ct.var['cv'] > np.percentile(ct.var['cv'], 20)
    
    msk = (msi.var['highly_variable'].values)
    hvm.append(list(msi.var[msk].index))
    hvg.append(list(rna.var[rna.var['highly_variable']].index))
    hvc.append(list(ct.var[ct.var['highly_variable']].index))

A1
MuData object with n_obs × n_vars = 5015 × 17556
  3 modalities
    rna:	2577 x 16260
      obs:	'in_tissue', 'array_row', 'array_col', 'x', 'y', 'lesion', 'region', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'n_genes', 'n_counts'
      var:	'gene_ids', 'feature_types', 'genome', 'mt', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells'
      uns:	'lesion_colors', 'log1p', 'region_colors', 'spatial'
      obsm:	'spatial'
      layers:	'counts'
    msi:	2438 x 1248
      obs:	'x', 'y', 'array_row', 'array_col', 'leiden', 'n_counts', 'index_right', 'region', 'lesion'
      var:	'mean', 'std', 'mz', 'max_intensity', 'mz_raw', 'annotated'
      uns:	'leiden', 'leiden_colors', 'log1p', 'n

In [9]:
# Common highly-variable metabolite peaks
hvm = reduce(np.intersect1d, hvm)
hvg = reduce(np.intersect1d, hvg)
hvc = reduce(np.intersect1d, hvc)

In [10]:
# only keep hvg receptors
resource['receptor'] = resource['receptor'].str.title()
receptors = resource['receptor'].unique()
receptors = receptors[np.isin(receptors, hvg)]

In [11]:
# check if predictor n are somewhat comparable
print(len(hvm), len(receptors), len(hvc))

83 45 37


## Run MISTy

In [12]:
interaction_list = {}
target_list = {}
bandwidth=1000
cutoff=0.1

for slide in slides:
    mdata = mdatas[slide]
    
    msi = mdata.mod["msi"]
    msi = msi[:, hvm].copy()
    sc.pp.scale(msi, max_value=5)
    msi.X[msi.X < 0] = 0
    
    # distances of metabolties to RNA
    reference = mdata.mod["msi"].obsm["spatial"]
    
    deconv = mdata.mod["deconv"][:, hvc]
    li.ut.spatial_neighbors(deconv, bandwidth=bandwidth, cutoff=cutoff, spatial_key="spatial", reference=reference, set_diag=False, standardize=False)
    
    rna = mdata.mod["rna"]
    li.ut.spatial_neighbors(rna, bandwidth=bandwidth, cutoff=cutoff, spatial_key="spatial", reference=reference, set_diag=False, standardize=False)
    
    # Subset RNA to metabolite receptors only
    rec = rna[:, receptors].copy()

    # MISTy
    mdata.update_obs()
    
    misty = li.mt.MistyData({"intra": msi, "receptor": rec, "ct": deconv}, enforce_obs=False, obs=mdata.obs)
    misty(model="linear", verbose=True, bypass_intra=True, maskby='lesion')
    
    interaction_list[slide] = misty.uns['interactions'].copy()
    target_list[slide] = misty.uns['target_metrics'].copy()

    mdata.write_h5mu(os.path.join('results', f"{slide}_misty.h5mu"))


view intra is not a csr_matrix. Converting to csr_matrix
view ct is not a csr_matrix. Converting to csr_matrix
Now learning: Dopamine masked by lesioned: 100%|██████████| 83/83 [00:18<00:00,  4.39it/s]
view intra is not a csr_matrix. Converting to csr_matrix
view ct is not a csr_matrix. Converting to csr_matrix
Now learning: Dopamine masked by lesioned: 100%|██████████| 83/83 [00:13<00:00,  6.25it/s]
view intra is not a csr_matrix. Converting to csr_matrix
view ct is not a csr_matrix. Converting to csr_matrix
Now learning: Dopamine masked by lesioned: 100%|██████████| 83/83 [00:13<00:00,  6.30it/s]


In [13]:
interactions = pd.concat(interaction_list).reset_index(names=['slide', 'ind']).drop(columns=['ind'])
target_metrics = pd.concat(target_list).reset_index(names=['slide', 'ind']).drop(columns=['ind'])

In [14]:
interactions.to_csv(os.path.join('results', "interactions.csv"), index=False)
target_metrics.fillna(0, inplace=True)
target_metrics.to_csv(os.path.join("results", "target_metrics.csv"), index=False)

## Run Local Scores

In [15]:
for slide in slides:
    mdata = mdatas[slide]
    rna = mdata.mod["rna"]
    msi = mdata.mod["msi"]
    deconv = mdata.mod["deconv"]
    
    # interpolate
    ad = li.ut.interpolate_adata(target=msi, reference=rna, use_raw=False, spatial_key='spatial')
    
    mdata = mu.MuData({'msi': ad, 'rna':rna, 'deconv':deconv}, obsm=rna.obsm, obs=rna.obs, uns=rna.uns)
    li.ut.spatial_neighbors(mdata, bandwidth=bandwidth, cutoff=cutoff, set_diag=True)

    li.mt.bivar(mdata, 
                function_name='cosine',
                x_mod='msi', 
                y_mod='deconv',
                x_use_raw=False, 
                y_use_raw=False,
                verbose=True, 
                mask_negatives=True, 
                add_categories=True,
                n_perms=1000,
                interactions = [('Dopamine', 'MSN1'),
                                ('Dopamine', 'MSN2')],
                x_transform=sc.pp.scale,
                y_transform=sc.pp.scale,
                mod_added='msi_ct'
                )


    li.mt.bivar(mdata, 
                function_name='cosine',
                x_mod='msi', 
                y_mod='rna',
                x_use_raw=False, 
                y_use_raw=False,
                verbose=True, 
                mask_negatives=True, 
                add_categories=True,
                n_perms=1000,
                interactions = [('Dopamine', 'Drd2')],
                x_transform=sc.pp.scale,
                y_transform=sc.pp.scale,
                mod_added='lr'
                )
    
    mdata.update_obs()
    mdata.update_var()
    mdata.write_h5mu(os.path.join('results', f"{slide}_local.h5mu"))

Using provided `interactions`.
Using `.X`!
Using `.X`!
Converting to sparse csr matrix!
Using `.X`!
Converting to sparse csr matrix!


Transforming msi using scale
Transforming deconv using scale


100%|██████████| 1000/1000 [00:00<00:00, 1380.99it/s]
Using provided `interactions`.
Using `.X`!
Using `.X`!


Transforming msi using scale
Transforming rna using scale


Using `.X`!
Converting to sparse csr matrix!
100%|██████████| 1000/1000 [00:00<00:00, 8349.13it/s]
... storing 'x' as categorical
Using provided `interactions`.
Using `.X`!
Using `.X`!
Converting to sparse csr matrix!
Using `.X`!
Converting to sparse csr matrix!


Transforming msi using scale
Transforming deconv using scale


100%|██████████| 1000/1000 [00:00<00:00, 1090.35it/s]
Using provided `interactions`.
Using `.X`!
Using `.X`!


Transforming msi using scale
Transforming rna using scale


Using `.X`!
Converting to sparse csr matrix!
1 features of mat are empty, they will be removed.
100%|██████████| 1000/1000 [00:00<00:00, 5829.37it/s]
... storing 'x' as categorical
Using provided `interactions`.
Using `.X`!
Using `.X`!
Converting to sparse csr matrix!
Using `.X`!
Converting to sparse csr matrix!


Transforming msi using scale
Transforming deconv using scale


100%|██████████| 1000/1000 [00:00<00:00, 1252.91it/s]
Using provided `interactions`.
Using `.X`!
Using `.X`!


Transforming msi using scale
Transforming rna using scale


Using `.X`!
Converting to sparse csr matrix!
100%|██████████| 1000/1000 [00:00<00:00, 7474.89it/s]
... storing 'x' as categorical
