In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import decoupler as dc
import mudata as mu
import liana as li

In [None]:
from liana.funcomics import obsm_to_adata

In [None]:
from liana.mt.sp import lr_bivar

#### Load data

In [None]:
adata = sc.read_h5ad(os.path.join(data_dir, 'AKK002_157781.h5ad'))

In [None]:
comps = obsm_to_adata(adata, 'compositions')

In [None]:
sq.pl.spatial_scatter(comps, color="Fib")

In [None]:
sq.pl.spatial_scatter(comps, shape=None, color="Fib", size=10)

#### Filter & normalize

In [None]:
sc.pp.filter_cells(adata, min_genes=400)
sc.pp.filter_genes(adata, min_cells=5)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

#### Get Spatial Neighbors

In [None]:
li.mt.spatial_neighbors(adata, bandwidth=100, cutoff=0.1)

#### Infer LR

In [None]:
lr_bivar(adata,
        function_name="masked_pearson",
        expr_prop=0.1,
        pvalue_method=None, 
        use_raw=False,
)

Create pipeline

In [1]:
import os
import numpy as np
import pandas as pd
import logging

from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression # TODO: replace with RF
from sklearn.metrics import r2_score, mean_squared_error

import scanpy as sc
import liana as li

In [2]:
from utils import load_prep_slide, _evaluate_regression

In [3]:
data_dir = os.path.join('..', '..', 'data', 'heart_visium')

In [4]:
# scan names of all datasets
dataset_names = [f for f in os.listdir(data_dir) if f.endswith('.h5ad')]

In [16]:
dataset_names = dataset_names[0:3]

In [5]:
function_names = li.mt.sp.show_functions()['name']
function_names = list(function_names[~function_names.str.contains('masked')]) + ['masked_spearman']

In [6]:
# Initialize the Random Forest Regressor with default parameters
# regressor = RandomForestRegressor(n_estimators=100, oob_score=True, n_jobs=-1, random_state=1337)
regressor = LinearRegression()

In [7]:
results = []

In [17]:
for function_name in function_names:
    print(f'Running {function_name}')
    
    # Initialize lists to store R2 scores and RMSE values
    r2_scores = []
    rmse_scores = []
    
    for dataset_name in dataset_names:
        print(f'Running {dataset_name}')
        
        # Load and preprocess data
        adata = load_prep_slide(data_dir, dataset_name, function_name)
        
        y = adata.obsm['compositions'].values
        X = adata.obsm['local_scores'].values
        
        # evaluate
        eval_df = _evaluate_regression(X, y, dataset_name, function_name, regressor)
        results.append(eval_df)
        


Running pearson
Running AKK001_157785.h5ad
Running AKK002_157779.h5ad
Running AKK002_157781.h5ad
Running spearman
Running AKK001_157785.h5ad
Running AKK002_157779.h5ad
Running AKK002_157781.h5ad
Running cosine
Running AKK001_157785.h5ad
Running AKK002_157779.h5ad
Running AKK002_157781.h5ad
Running jaccard
Running AKK001_157785.h5ad
Running AKK002_157779.h5ad
Running AKK002_157781.h5ad
Running morans
Running AKK001_157785.h5ad




Running AKK002_157779.h5ad




Running AKK002_157781.h5ad




Running masked_spearman
Running AKK001_157785.h5ad
Running AKK002_157779.h5ad
Running AKK002_157781.h5ad


In [19]:
results = pd.concat(results)

In [21]:
results.to_csv('results.csv', index=False)