# Call Annotations

In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import liana as li

In [2]:
from utils import run_rf_auc

In [3]:
data_dir = os.path.join('..', '..', 'data', 'wu_et_al')
dataset_names = ['1160920F', 'CID44971', 'CID4535', '1142243F'] # 'CID4465', 'CID4290 are both basically cancer only

In [4]:
function_names = li.mt.bivar.show_functions()['name'].values

In [5]:
function_names

array(['pearson', 'spearman', 'cosine', 'jaccard', 'product',
       'norm_product', 'morans', 'masked_spearman'], dtype=object)

Run on all data

In [8]:
performances = {}
for dataset_name in dataset_names:
    print(f"Running {dataset_name}")
    adata = sc.read_h5ad(os.path.join(data_dir, f"{dataset_name}.h5ad"))
    
    adata.uns['function_names'] = function_names
    
    # to binary
    adata.obs['Classification'][adata.obs['Classification'].isna()] = 'Artefact'
    adata.obs['spot_label'] = adata.obs['Classification'].str.contains('cancer').astype(int)
    
    # Preprocess
    sc.pp.filter_cells(adata, min_genes=400)
    sc.pp.filter_genes(adata, min_cells=20)

    adata.layers["counts"] = adata.X.copy()
    
    sc.pp.normalize_total(adata, inplace=True)
    sc.pp.log1p(adata)
    
    # Run all functions
    for function_name in function_names:
        print(f"Running {function_name}")
        
        if function_name not in ['product', 'norm_product']:
            standardize = False
        else:
            standardize = True
        
        li.ut.spatial_neighbors(adata, set_diag=True, bandwidth=150, cutoff=0.1, standardize=standardize)
        
        li.mt.lr_bivar(adata,
                       function_name=function_name,
                       obsm_added=function_name, 
                       use_raw=False, 
                       verbose=True,
                       n_perms=None,
                       # proximity_key="spatial_connectivities"
                       )
        
    # eval LR basis
    run_rf_auc(adata, dataset_name)
    os.makedirs(os.path.join(data_dir, 'results'), exist_ok=True)
    performance = adata.uns['performance']
    performances[dataset_name] = performance


In [34]:
# save results
performance = pd.concat(performances, names=['dataset_name', None])
performance.to_csv("annotation_results.csv")