In [17]:
import os
from tqdm import tqdm
import pickle
import random

import pandas as pd
import numpy as np
import scipy
from sklearn.metrics.pairwise import pairwise_kernels
import matplotlib.pyplot as plt

from metaspace import SMInstance
from anndata import AnnData
from metaspace2anndata import dataset_to_anndata
import scanpy as sc
import squidpy as sq

# Insert config & co.

In [19]:
date_key = '230201'

# Store in Alexandrov g drive
data_dir = '/g/alexandr/tim/metaspace_evaluation/'

store_dir = os.path.join(data_dir, date_key)

database = ('HMDB', 'v4')

filename = 'hmdb4_autocorrelation.pickle'

# Load dataset
adata_mol = pickle.load(open( os.path.join(store_dir, 'all_datasets_mol_anndata.pickle'), "rb" ))

sc.pp.filter_genes(adata_mol, min_cells=200)
sc.pp.filter_cells(adata_mol, min_genes=50)
sc.pp.normalize_total(adata_mol, target_sum=1e4)

out_dict = {}

sm = SMInstance()
counter = 2001

In [20]:
out_dict = pickle.load(open(os.path.join('/scratch/trose/tmp', str(2000) + '_' + filename), "rb" ))

In [21]:
len(out_dict)

6009

In [22]:
for ds_id in adata_mol.obs.index:
    
    if ds_id not in out_dict.keys():
        
        print(ds_id)
        ds = sm.dataset(id=ds_id)
        
        try:

            tmp_adata = dataset_to_anndata(ds, fdr=0.1, database=database)
            
            sq.gr.spatial_neighbors(tmp_adata, coord_type='grid')
            tmp_adata.obsp['connectivities'] = tmp_adata.obsp['spatial_connectivities']

            out_dict[ds_id] = {
                'xdim': max(tmp_adata.obs['x']) + 1,
                'ydim': max(tmp_adata.obs['y']) + 1,
                'nfeatures': len(tmp_adata.var.index),
                'autocorrelation': {k: v for v, k in zip(sc.metrics.morans_i(tmp_adata), tmp_adata.var.index)}
            }
            
            counter += 1

            if (counter % 50) == 0:
                        pickle.dump(out_dict, 
                    open( os.path.join('/scratch/trose/tmp', str(counter) + '_' + filename), "wb" ) )
                
        except:
            print('skipping')
            continue
    
        print(counter)
        
pickle.dump(out_dict, 
            open( os.path.join(store_dir, filename), "wb" ) )

2023-02-01_06h52m46s
skipping
2022-10-25_22h26m32s
skipping
2022-09-16_10h44m35s
skipping
2022-08-29_23h04m44s
skipping
2021-09-21_23h51m11s
skipping
2021-08-25_20h59m05s
skipping
2021-08-05_00h51m27s
skipping
2021-08-02_09h45m30s
skipping
2021-07-22_13h26m26s
skipping
2021-07-12_17h34m50s
skipping
2021-07-06_15h53m32s
skipping
2021-06-14_14h31m25s
skipping
2021-05-30_18h51m15s
skipping
2021-04-27_18h46m55s
skipping
2021-04-27_18h45m26s
skipping
2021-01-29_16h20m38s
skipping
2020-10-21_19h54m11s
skipping
2019-12-05_11h13m06s
skipping
2019-09-27_19h28m56s
skipping
2019-09-27_19h28m08s
skipping
2019-09-27_19h27m04s
skipping
2019-09-27_19h26m04s
skipping
2019-09-27_19h24m56s
skipping
2019-08-18_19h50m58s
skipping
2019-08-18_15h39m14s
skipping
2019-08-15_15h34m59s
skipping
2019-08-12_11h38m59s
skipping
2019-03-19_06h36m18s
skipping
2019-03-19_06h02m30s
skipping
2019-02-27_11h03m02s
skipping
2018-11-30_03h20m05s
skipping
2018-11-30_03h16m06s
skipping
2018-10-10_16h20m23s
skipping
2018-08-03

100%|████████████████████████████████████████| 125/125 [00:03<00:00, 34.55it/s]


skipping
2017-02-09_03h00m31s
skipping
2017-03-17_08h39m50s
skipping
2017-02-17_04h13m17s
skipping
2017-05-07_08h51m27s
skipping
2017-05-07_09h41m33s
skipping
2016-10-01_12h25m48s
skipping
2016-10-01_12h05m45s
skipping
2016-12-13_13h53m14s
skipping
2016-12-13_13h54m24s
skipping
2016-12-13_13h54m35s
skipping
2016-12-13_13h54m45s
skipping
2016-12-13_13h54m39s
skipping
2017-05-22_06h33m10s
skipping
2017-05-22_06h39m58s
skipping
2017-10-16_17h02m38s
skipping
2017-10-24_13h41m42s
skipping
2017-07-31_19h20m33s
skipping
2017-05-22_14h31m10s
skipping
2017-05-22_14h12m32s
skipping
2018-02-23_09h07m57s


100%|████████████████████████████████████████| 8/8 [00:00<00:00, 19.46it/s]


skipping
2018-02-23_11h49m27s


100%|████████████████████████████████████████| 57/57 [00:03<00:00, 14.36it/s]


skipping
2018-04-26_11h23m01s


100%|████████████████████████████████████████| 423/423 [00:06<00:00, 64.15it/s]


2002
2018-04-26_11h22m02s


100%|████████████████████████████████████████| 435/435 [00:04<00:00, 105.22it/s]


2003
2018-04-26_10h00m51s


100%|████████████████████████████████████████| 6/6 [00:00<00:00, 15.76it/s]


2004
2018-04-26_09h41m37s


100%|████████████████████████████████████████| 19/19 [00:01<00:00, 13.25it/s]


2005
2018-04-24_11h38m35s


100%|████████████████████████████████████████| 261/261 [00:05<00:00, 47.89it/s]


2006
2018-04-24_18h54m28s


100%|████████████████████████████████████████| 2/2 [00:00<00:00,  7.64it/s]


2007
2018-04-24_12h25m13s


100%|████████████████████████████████████████| 423/423 [00:06<00:00, 63.43it/s] 


2008
2018-04-24_12h22m14s


100%|████████████████████████████████████████| 486/486 [00:05<00:00, 96.89it/s] 


2009
2018-04-24_12h18m42s


100%|████████████████████████████████████████| 675/675 [00:05<00:00, 131.62it/s]


2010
2018-04-24_12h12m43s


100%|████████████████████████████████████████| 654/654 [00:05<00:00, 128.37it/s]


2011
2018-04-24_12h05m52s


100%|████████████████████████████████████████| 571/571 [00:04<00:00, 131.59it/s]


2012
2018-04-24_11h58m51s


100%|████████████████████████████████████████| 603/603 [00:05<00:00, 119.34it/s]


2013
2018-04-24_11h49m16s


100%|████████████████████████████████████████| 544/544 [00:04<00:00, 109.18it/s]


2014
2018-05-04_19h35m24s


100%|████████████████████████████████████████| 122/122 [00:03<00:00, 32.14it/s]


2015
2018-05-04_17h29m28s


100%|████████████████████████████████████████| 112/112 [00:03<00:00, 30.55it/s]


2016
2016-11-15_12h00m00s


100%|████████████████████████████████████████| 116/116 [00:04<00:00, 28.04it/s]


2017
2016-10-10_10h50m00s


100%|████████████████████████████████████████| 140/140 [00:03<00:00, 37.11it/s]


2018
2016-10-10_10h50m02s


100%|████████████████████████████████████████| 61/61 [00:03<00:00, 15.77it/s]


2019
2016-11-15_12h49m00s


100%|████████████████████████████████████████| 30/30 [00:03<00:00,  8.38it/s]


2020
2016-10-10_10h50m03s


100%|████████████████████████████████████████| 94/94 [00:03<00:00, 24.75it/s]


2021
2016-10-10_10h50m01s


100%|████████████████████████████████████████| 25/25 [00:02<00:00, 10.99it/s]


2022
2017-03-22_10h45m50s


100%|████████████████████████████████████████| 25/25 [00:02<00:00, 11.66it/s]


2023
2018-05-04_10h15m29s


100%|████████████████████████████████████████| 6/6 [00:00<00:00, 14.14it/s]


2024
2018-05-04_03h10m28s


100%|████████████████████████████████████████| 4/4 [00:00<00:00, 11.64it/s]


2025
2018-05-04_03h04m13s


100%|████████████████████████████████████████| 18/18 [00:01<00:00, 15.10it/s]


2026
2018-05-03_17h34m18s


100%|████████████████████████████████████████| 22/22 [00:01<00:00, 11.51it/s]


2027
2018-05-03_15h21m06s


100%|████████████████████████████████████████| 14/14 [00:00<00:00, 14.37it/s]


2028
2018-05-03_15h47m02s


100%|████████████████████████████████████████| 14/14 [00:01<00:00, 13.25it/s]


2029
2018-05-03_15h58m08s


100%|████████████████████████████████████████| 12/12 [00:00<00:00, 17.27it/s]


2030
2018-04-25_09h27m43s


100%|████████████████████████████████████████| 15/15 [00:00<00:00, 15.43it/s]


2031
2018-04-27_16h56m19s


100%|████████████████████████████████████████| 14/14 [00:01<00:00, 13.19it/s]


2032
2018-04-27_17h39m20s


100%|████████████████████████████████████████| 12/12 [00:00<00:00, 17.71it/s]


2033
2018-04-27_17h26m55s


100%|████████████████████████████████████████| 13/13 [00:00<00:00, 14.22it/s]


2034


In [7]:
ds_id

'2023-02-01_06h52m46s'

In [13]:
sm = SMInstance()
ds = sm.dataset(id='2023-02-01_06h52m46s')

In [15]:
ds.results(fdr=0.1)