In [1]:
import os
from tqdm import tqdm
import pickle
import random

import pandas as pd
import numpy as np
import scipy
from sklearn.metrics.pairwise import pairwise_kernels
import matplotlib.pyplot as plt

from metaspace import SMInstance
from anndata import AnnData
from metaspace2anndata import dataset_to_anndata
import scanpy as sc
import squidpy as sq

# Insert config & co.

In [2]:
date_key = '230201'

# Store in Alexandrov g drive
data_dir = '/g/alexandr/tim/metaspace_evaluation/'

store_dir = os.path.join(data_dir, date_key)

database = ('HMDB', 'v4')

filename = 'hmdb4_autocorrelation.pickle'

# Load dataset
adata_mol = pickle.load(open( os.path.join(store_dir, 'all_datasets_mol_anndata.pickle'), "rb" ))

sc.pp.filter_genes(adata_mol, min_cells=200)
sc.pp.filter_cells(adata_mol, min_genes=50)
sc.pp.normalize_total(adata_mol, target_sum=1e4)

out_dict = {}

sm = SMInstance()
counter = 0


# Loop over all remaining datasets after filtering
for ds_id in adata_mol.obs.index:
    
    ds = sm.dataset(id=ds_id)
    
    tmp_adata = dataset_to_anndata(ds, fdr=0.5, database=database)
    
    sq.gr.spatial_neighbors(tmp_adata, coord_type='grid')
    tmp_adata.obsp['connectivities'] = tmp_adata.obsp['spatial_connectivities']
    
    out_dict[ds_id] = {
        'xdim': max(tmp_adata.obs['x']) + 1,
        'ydim': max(tmp_adata.obs['y']) + 1,
        'nfeatures': len(tmp_adata.var.index),
        'autocorrelation': {k: v for v, k in zip(sc.metrics.morans_i(tmp_adata), tmp_adata.var.index)}
    }
    
    if (counter % 100) == 0:
                pickle.dump(out_dict, 
            open( os.path.join('/scratch/trose/tmp', filename + '_' + str(counter)), "wb" ) )
    
    
pickle.dump(out_dict, 
            open( os.path.join(store_dir, filename), "wb" ) )

100%|████████████████████████████████████████| 1065/1065 [00:09<00:00, 116.33it/s]


NameError: name 'all_results_dict' is not defined

In [3]:
out_dict

{'2023-02-01_06h55m41s': {'xdim': 100,
  'ydim': 55,
  'nfeatures': 1065,
  'autocorrelation': {'C8H8O3+H': 0.6795107767463024,
   'C5H15NO4P[M]+': 0.6517302421013412,
   'C44H81O8P+Na': 0.8764500613891754,
   'C42H82NO8P+H': 0.07853041517157375,
   'C16H14O4+H': 0.008262937811252273,
   'C44H86NO8P+H': 0.1799852027594655,
   'C10H11NO+Na': 0.6517302421013412,
   'C37H62O10[M]+': 0.13054826712342696,
   'C17H12O8[M]+': 0.05550090589696991,
   'C29H50O2[M]+': 0.11771467241513252,
   'C8H10O3[M]+': 0.6727491874945213,
   'C21H19I4NO10[M]+': 0.0456504213518802,
   'C33H52O5+H': -3.675723756531605e-05,
   'C9H12O4[M]+': 0.5315134597468163,
   'C9H9N+Na': 0.6727491874945213,
   'C18H14O6+H': -0.0018146591614248953,
   'C41H76NO10P+NH4': 0.8764500613891754,
   'C11H12O+NH4': 0.6315479047129092,
   'C11H15NO+H': 0.6315479047129092,
   'C35H52O5+H': 0.02142261618184576,
   'C34H67NO3+H': 0.19740490967168436,
   'C41H79O8P+NH4': 0.054344984298507766,
   'C41H82NO8P+H': 0.054344984298507766,
   