# Mx_Coculture: Comparison between Python and R

In this notebook, I create similar plots to the ones shown by Molenaar et al.
 

In [None]:
import platform
import os
import multiprocessing
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc
import seaborn as sns
from sklearn.cluster import KMeans
import re
import outer_spacem as osm
import sys
sys.path.append('/home/mklein/spacem')
sys.path.append('/Volumes/mklein/spacem')
sys.path.append('/home/mklein/FDA_project')
from src.correction import *
from src.evaluation import intermixing, MetaboliteAnalysis

%matplotlib inline

In [None]:
# Parameters
source_path = "/home/mklein/Raw Data/Coculture"
target_path = "/home/mklein/FDA_project/data/Mx_Co_Cultured"
condition_name = "celltype"
well_name = "rowcol"
deconv_default_min_overlap = 0.3
analysis_path = "/home/mklein/FDA_project/analysis/Mx_Coculture"
notebooks = [
    "pipeline_01_correction.ipynb",
    "pipeline_02_processing.ipynb",
    "pipeline_03_evaluation.ipynb",
]
project = "Mx_Coculture"


Loading the uncorrected and ISM-corrected dataset from file. Additionally, loading the metadata CSV file to filter out excluded wells.

In [None]:
adata = ad.read(os.path.join(target_path, "dataset_threshold_0.1", "cells_spatiomolecular_adata.h5ad"))
adata_cor = ad.read(os.path.join(target_path, "dataset_threshold_0.1", "cells_spatiomolecular_adata_corrected.h5ad"))

In [None]:
included_molecules = adata.var_names.intersection(adata_cor.var_names)
included_cells = adata.obs_names.intersection(adata_cor.obs_names)

def subset_molecules(adata):
    
    return adata[included_cells, included_molecules].copy()

adata = subset_molecules(adata)
adata_cor = subset_molecules(adata_cor)

print(adata.shape)
print(adata_cor.shape)

First of all, the loaded datasets are filtered:

- cells need non-zero intensities for at least 10 ions.
- ions need non-zero intensities for at least 200 cells.

After that, the sets are preprocessed in different ways:

- intensties are normalized to TIC and/or log-transformed (log(x+1))

After that, both datasets are subset to contain the same ions and cells (intersection).

In [None]:
def preprocess(adata):
    adata.obs['well'] = 'dataset'
    sc.pp.filter_cells(adata, min_genes=5)
    sc.pp.filter_genes(adata, min_cells=1)
    adata.raw = adata
    adata.layers["raw_counts"] = adata.X.copy()
    sc.pp.scale(adata)
    adata.layers["norm_counts"] = sc.pp.normalize_total(adata, layer='raw_counts', target_sum=None, inplace=False)['X']
    adata.layers["1e4_norm_counts"] = sc.pp.normalize_total(adata, layer='raw_counts', target_sum=1e4, inplace=False)['X']
    adata.layers["scaled_counts"] = sc.pp.scale(adata, layer='raw_counts', copy=True).X
    
    adata.layers["log_raw_counts"] = sc.pp.log1p(adata.layers["raw_counts"], copy=True)
    adata.layers["log_norm_counts"] = sc.pp.log1p(adata.layers["norm_counts"], copy=True)
    adata.layers["1e4_log_norm_counts"] = sc.pp.log1p(adata.layers["1e4_norm_counts"], copy=True)
    adata.X = adata.layers["scaled_counts"]
    
    adata.var['median_intensity'] = np.median(adata.X, axis=0)
    adata.var['mean_intensity'] = np.mean(adata.X, axis=0)
    # adata_x = adata.X.copy()
    # adata_x[adata_x == 0] = np.nan
    # adata.var['median_intensity_nonzero'] = np.nanmedian(adata_x, axis=0)
    
    
preprocess(adata)
preprocess(adata_cor)

print(adata.shape)
print(adata_cor.shape)

In [None]:
def dimred_umap(adata, layer=None, min_dist=0.5, point_size=15):
    if layer is not None:
        adata.layers['default_X'] = adata.X
        adata.X = adata.layers[layer]
    
    sc.pp.pca(adata)
    sc.pp.neighbors(adata, n_neighbors=200, metric='cosine')
    sc.tl.umap(adata, min_dist=min_dist, spread=2.0, random_state=1, n_components=2)
    sc.pl.umap(adata, color=[condition_name], palette='cividis')
    
    
    if layer is not None:
        adata.X = adata.layers['default_X']

In [None]:
def intermixing_layer(adata, adata_cor, condition_name, measures = ['X_pca', 'X_umap'], layer=None):
    if layer is not None:
        adata.layers['default_X'] = adata.X
        adata.X = adata.layers[layer]
        adata_cor.layers['default_X'] = adata_cor.X
        adata_cor.X = adata_cor.layers[layer]
    
    summaries = intermixing({'uncorrected': adata, 'ISM correction': adata_cor}, condition_name = condition_name, measures = measures)
    
    if layer is not None:
        adata.X = adata.layers['default_X']
        adata_cor.X = adata_cor.layers['default_X']
    
    return
    

As in the manuscript of MOlenaar et al, data is scaled to zero mean and standard variance.

In [None]:
dimred_umap(adata, layer='scaled_counts')
dimred_umap(adata_cor, layer='scaled_counts')
intermixing_layer(adata, adata_cor, condition_name, measures = ['X_pca', 'X_umap'], layer='scaled_counts')

In [None]:
sc.pl.umap(adata, color=[condition_name], palette='cividis', title="none")
sc.pl.umap(adata_cor, color=[condition_name], palette='cividis', title="ISM (non-supervised)")