In [2]:
%load_ext autoreload
%aimport outer_spacem

%autoreload 1


import os

import pandas as pd
import numpy as np
from pathlib import Path

from SpaceM.lib.modules.metaspace_annotation import MetaSpaceAnnotationsLoader
# from SpaceM.lib.datastructures.project_config import read_project_config
from SpaceM.lib.datastructures.project_config import ProjectConfig
from SpaceM.lib.datastructures.ion_image import write_ion_metadata
import scanpy as sc


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Pixel analysis part 1
# Download datasets from Metaspace

## Set paths

### Input
`metadata_path`: metadata table (I prefer downloaded from Metaspace)  
Needs to have the following columns:  
`datasetId` - Metaspace id  
`datasetName`	- name of the dataset  

All other columns are optional, generally useful to have "condition" column or something similar used for comparison in the next steps of analysis  

Script will add to the metadata table additional column "data_file" and save updated metadata to the analysis directory at `metadata_save_path`

### Output
`data_dir`: general root dir for storing data, including downloaded Metaspace datasets and plots  
`metaspace_data_dir`: directory, where ion images and metadata will be stored
`plots_path`: directory for plots  (ion images, cell masks, plots from scanpy analysis)

In [3]:
data_dir = Path(r"/Users/alberto-mac/EMBL_ATeam/projects/gastrosome_compare_conditions")
metaspace_data_dir = data_dir / "pixel_analysis_reannotated" / "metaspace_data"
metaspace_data_dir.mkdir(parents=True, exist_ok=True)

metadata_path = Path(r"/Users/alberto-mac/EMBL_ATeam/projects/gastrosome_compare_conditions/metaspace_datasets.csv")
metadata_save_path = data_dir / "pixel_analysis_reannotated" / "metaspace_datasets_paths.csv"

plots_path = data_dir / "pixel_analysis_reannotated"/ "plots"
plots_path.mkdir(parents=True, exist_ok=True)

## Check and subset metadata

Metadata file downloaded from Metaspace will have the two line header, then column names and then rows corresponding to the datasets. I prefer to keep the header just in case and read in with pandas starting with the third line

In [4]:
metadata = pd.read_csv(metadata_path, skiprows=2)
metadata

Unnamed: 0,datasetId,datasetName,condition,well,slide,group,submitter,PI,organism,organismPart,...,growthConditions,ionisationSource,maldiMatrix,analyzer,resPower400,polarity,uploadDateTime,FDR@10%,database,opticalImage
0,2021-10-27_00h20m47s,2021-28-09_Gastrosome_Slide6Drugs_Well8_150x15...,Drugs,8,6,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-26T18:20:47.944000,132,SwissLipids - 2018-02-02,https://metaspace2020.eu/fs/raw_optical_images...
1,2021-10-27_00h05m07s,2021-28-09_Gastrosome_Slide5Feeding_Well3_150x...,Feeding,3,5,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-26T18:05:07.978000,107,SwissLipids - 2018-02-02,https://metaspace2020.eu/fs/raw_optical_images...
2,2021-10-27_23h59m41s,2021-28-09_Gastrosome_Slide1control_well7_100x...,Control,7,1,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T23:59:41.744511,94,CoreMetabolome - v3,No optical image
3,2021-10-27_23h59m25s,2021-28-09_Gastrosome_Slide1control_well8_100x...,Control,8,1,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T23:59:25.751249,113,SwissLipids - 2018-02-02,No optical image
4,2021-10-27_00h32m38s,2021-28-09_Gastrosome_Slide1control_well4_150x...,Control,4,1,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T00:32:39.557240,148,SwissLipids - 2018-02-02,No optical image
5,2021-10-27_00h20m58s,2021-28-09_Gastrosome_Slide6Drugs_Well4_150x15...,Drugs,4,6,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T00:20:59.427535,74,CoreMetabolome - v3,No optical image
6,2021-10-27_00h16m49s,2021-28-09_Gastrosome_Slide6Drugs_Well3_150x15...,Drugs,3,6,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T00:16:49.937781,34,CoreMetabolome - v3,No optical image
7,2021-10-27_00h09m40s,2021-28-09_Gastrosome_Slide5Feeding_Well8_150x...,Feeding,8,5,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T00:09:40.949112,93,SwissLipids - 2018-02-02,No optical image
8,2021-10-27_00h03m04s,2021-28-09_Gastrosome_Slide5Feeding_Well7_150x...,Feeding,7,5,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-27T00:03:04.917123,69,SwissLipids - 2018-02-02,No optical image
9,2021-10-26_23h23m07s,2021-28-09_Gastrosome_Slide1control_well3_100x...,Control,3,1,♡EMBL♡,Mohammed Shahraz,Theodore Alexandrov,Homo sapiens (human) | Mus musculus (mouse),Cells,...,,MALDI,DHB,Orbitrap,98995,positive,2021-10-26T23:23:07.395537,65,SwissLipids - 2018-02-02,No optical image


Doing analysis with all samples

In [5]:
subset_metadata = metadata

## Set up Metaspace download parameters

In [6]:
# Molecules Databases to download:
databases = [
    ['Gastrosome_pixel_analysis_intra_ions', 'v1'],
    # ["SwissLipids", "2018-02-02"]
]

# Maximum false discovery rate. Valid values are 0.05, 0.1, 0.2, 0.5 corresponding to 5%, 10%, 20% and 50%
fdr = 0.5

## Set paths of the datasets and store final metadata table

In [7]:
subset_metadata.loc[:, "metaspace_download_dir_path"] = [str(metaspace_data_dir / "slide_{slide}/W{well}".format(**ds_info.to_dict())) for _, ds_info in subset_metadata.iterrows()]

subset_metadata.to_csv(metadata_save_path, index=False)

## Download
- You need your METASPACE credentials set in your `~/.metaspacecredentials` file
(which should be already in place if you already used SpaceM).
- Next, create a SpaceM config file and select the molecules databases you want to download in the `Mass Spectrometry Annotation` step


In [11]:
# Create a temporary SpaceM config with infos about the molecule databases that you want to download:
# path_spacem_config = "/Users/alberto-mac/EMBL_ATeam/projects/gastrosome_compare_conditions/spacem_config_template.json"
# spacem_config = read_project_config(path_spacem_config)

# Create empty SpaceM config and specify the needed molecule databases
spacem_config = ProjectConfig()
spacem_config.mass_spec_annotation.metaspace.databases = databases
spacem_config.mass_spec_annotation.metaspace.fdr = fdr



In [13]:

# Silence some warning while saving adata:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, message=r".*Reordering categories will always return a new Categorical object.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=r".*is_categorical is deprecated and will be removed in a future version.*")
# warnings.filterwarnings("ignore", category=FutureWarning, message=r".*Transforming to str index.*")


# Create adata file:
# ds_meta = subset_metadata[0]
for id, ds_meta in subset_metadata.iterrows():
    print(f"Downloading data for {ds_meta.datasetName}")
    dataset_id = ds_meta["datasetId"]
    metaspace_download_dir_path = ds_meta["metaspace_download_dir_path"]
    os.makedirs(metaspace_download_dir_path, exist_ok=True)

    # Manually set the dataset ID in the config object:
    spacem_config.mass_spec_annotation.metaspace.dataset_id = dataset_id

    # Download from METASPACE:
    annotation_loader = MetaSpaceAnnotationsLoader(output_dir=Path(metaspace_download_dir_path),
                               config=spacem_config)
    ion_images = annotation_loader.run().ion_images

    # Save metadata to disk:
    ion_metadata = ion_images.metadata
    ion_images_metadata_path = Path(metaspace_download_dir_path) / "ion_images_metadata.csv"
    write_ion_metadata(ion_metadata, ion_images_metadata_path)

    cell_mask = np.ones(ion_images.shape, dtype="bool")

    cell_indices = cell_mask.nonzero()

    count_matrix = np.reshape(ion_images.array, (ion_images.array.shape[0], 1, -1))

    count_matrix = count_matrix[:, cell_mask.reshape((1, -1)) > 0]
    obs_df = pd.DataFrame(data={"x": cell_indices[0], "y": cell_indices[1]})
    for col in ds_meta.index:
        obs_df[col] = ds_meta[col]

    # # Add TIC to .obs
    # obs_df["tic"] = np.reshape(tic, (1, -1))[cell_mask.reshape((1, -1)) > 0]

    # Fix some types to avoid problems while saving adata:
    ion_metadata.databases = ion_metadata.databases.astype("str")
    ion_metadata.moleculeIds = ion_metadata.moleculeIds.astype("str")
    ion_metadata.moleculeNames = [str(name) for name in ion_metadata.moleculeNames]
    ion_metadata.fdr = ion_metadata.fdr.astype("str")

    adata = sc.AnnData(X=count_matrix.T,
                       obs=obs_df,
                       var=ion_metadata.set_index("annotation_id"))

    adata.write(Path(metaspace_download_dir_path) / "adata.h5ad")



Downloading data for 2021-28-09_Gastrosome_Slide6Drugs_Well8_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 86.11it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide5Feeding_Well3_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 84.24it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide1control_well7_100x100_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 87.74it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide1control_well8_100x100_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:04<00:00, 95.82it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide1control_well4_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:06<00:00, 76.56it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide6Drugs_Well4_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 80.83it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide6Drugs_Well3_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:06<00:00, 74.06it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide5Feeding_Well8_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 85.23it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide5Feeding_Well7_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 90.09it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide1control_well3_100x100_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:05<00:00, 84.57it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


Downloading data for 2021-28-09_Gastrosome_Slide6Drugs_Well7_150x150_a29ss25_DHBpos


100%|████████████████████████████████████████| 470/470 [00:06<00:00, 72.30it/s] 
... storing 'datasetId' as categorical
... storing 'datasetName' as categorical
... storing 'condition' as categorical
... storing 'group' as categorical
... storing 'submitter' as categorical
... storing 'PI' as categorical
... storing 'organism' as categorical
... storing 'organismPart' as categorical
... storing 'condition.1' as categorical
... storing 'ionisationSource' as categorical
... storing 'maldiMatrix' as categorical
... storing 'analyzer' as categorical
... storing 'polarity' as categorical
... storing 'uploadDateTime' as categorical
... storing 'database' as categorical
... storing 'opticalImage' as categorical
... storing 'metaspace_download_dir_path' as categorical
... storing 'formula' as categorical
... storing 'adduct' as categorical
... storing 'fdr' as categorical
... storing 'databases' as categorical
... storing 'moleculeNames' as categorical
... storing 'moleculeIds' as categorical


In [None]:
ds_meta.datasetName