In [1]:
import os
from tqdm import tqdm
import pickle
import random

import pandas as pd
import numpy as np
import scipy
from sklearn.metrics.pairwise import pairwise_kernels
import matplotlib.pyplot as plt

from metaspace import SMInstance
from anndata import AnnData
from metaspace2anndata import dataset_to_anndata

In [2]:
date_key = '230201'

# Store in Alexandrov g drive
data_dir = '/g/alexandr/tim/metaspace_evaluation/'

store_dir = os.path.join(data_dir, date_key)

if date_key not in os.listdir(data_dir):
    os.mkdir(store_dir)

In [3]:
sm = SMInstance()
dss = pickle.load(open(os.path.join(store_dir, 'all_datasets.pickle'), "rb" ) )

In [7]:
os.mkdir(os.path.join(store_dir, 'sl_coloc'))
os.mkdir(os.path.join(store_dir, 'sl_anndata'))

In [None]:
database = ('SwissLipids', '2018-02-02')

for ds in tqdm(dss):
    # Filter for datasets wit SwissLipids annotation
    if database in [(x.name, x.version) for x in ds.database_details]:
        
        if ds.id + '.pickle' not is os.listdir(os.path.join(store_dir, 'sl_anndata')) or ds.id + '.pickle' not is os.listdir(os.path.join(store_dir, 'sl_coloc')):
        
            # Download results
            res = ds.results(fdr=0.1, database=database)

            # Only consider datasets with at least 100 annotations:
            if res.shape[0] >= 100:

                # download all annotation images
                aai = ds.all_annotation_images(fdr=0.1, 
                                               database=database, 
                                               only_first_isotope=True, 
                                               scale_intensity=False, 
                                               hotspot_clipping=False)

                # Only consider images with at least 1000 pisels and 20x20 dimensions
                if (aai[0]._images[0].size >= 1000) and (aai[0]._images[0].shape[0] >= 20) and (aai[0]._images[0].shape[1] >= 20):

                    # Median filter for coloc analysis
                    ion_array = np.array([scipy.signal.medfilt2d(x._images[0], 
                                                                 kernel_size=3).flatten() 
                                          for x in aai])

                    # Save coloc in dataframe
                    coloc_df = pd.DataFrame(pairwise_kernels(ion_array, metric='cosine'), 
                                            columns = [x.formula + x.adduct for x in aai], 
                                            index=[x.formula + x.adduct for x in aai])
                    coloc_df.to_pickle(os.path.join(store_dir, 'sl_coloc', ds.id + '.pickle'))


                    # Create AnData object
                    adata = dataset_to_anndata(ds=ds,
                                               database=database,
                                               fdr=0.1,
                                               results=res,
                                               all_annotation_images=aai)

                    pickle.dump(adata, open(os.path.join(store_dir, 'sl_anndata', ds.id + '.pickle'), "wb" ))
                

  6%|▌         | 433/7659 [16:22<3:19:36,  1.66s/it]
  0%|                                        | 0/159 [00:00<?, ?it/s][A
  1%|▎                                       | 1/159 [00:03<07:56,  3.01s/it][A
  2%|▊                                       | 3/159 [00:03<02:55,  1.12s/it][A
100%|████████████████████████████████████████| 159/159 [00:04<00:00, 37.97it/s]A
  6%|▌         | 434/7659 [16:32<6:48:17,  3.39s/it]
  0%|                                        | 0/264 [00:00<?, ?it/s][A
  0%|▏                                       | 1/264 [00:04<18:05,  4.13s/it][A
 34%|█████████████▋                          | 90/264 [00:04<00:05, 29.30it/s][A
100%|████████████████████████████████████████| 264/264 [00:04<00:00, 58.22it/s][A
  6%|▌         | 435/7659 [16:43<10:19:14,  5.14s/it]
  0%|                                        | 0/274 [00:00<?, ?it/s][A
  0%|▏                                       | 1/274 [00:04<18:17,  4.02s/it][A
  1%|▌                                       | 4/27