# Concatenate low mass range and high mass range datasets

In [9]:
from pathlib import Path
from concatenate_imzml_files import concat_imzml_files
from definitions import ROOT_DIR
import numpy as np

Provide inputs
- Folder where imzmls for all matrices are located

In [10]:
p = Path(ROOT_DIR) / "1_stitch_and_upload_datasets"

- Iterate over files to find pairs that will be stitched

In [22]:
all_args = []

for mpath in p.iterdir():
    if mpath.is_dir():
#     if mpath == p /'pNA':
        
        fname_pos = mpath / f"{mpath.name}_pos_mz70-1510.imzML"
        lo_pos = list(mpath.rglob('*pos*mz70*.imzML'))[0]
        hi_pos = list(mpath.rglob('*pos*mz300*.imzML'))[0]
        
        fname_neg = mpath / f"{mpath.name}_neg_mz70-1510.imzML" 
        lo_neg = list(mpath.rglob('*neg*mz70*.imzML'))[0]
        hi_neg = list(mpath.rglob('*neg*mz300*.imzML'))[0]
        
        all_args.append([[lo_pos, hi_pos], fname_pos])
        all_args.append([[lo_neg, hi_neg], fname_neg])
        
# np.save(p / 'stitching_inputs.npy', list(all_args))

- Concatenate each pair of files

In [26]:
for input_paths, output_path in all_args:
    offsets = [(0,0), (0,0)] # (x,y) tuple for each imzML file if you want to offset either file's coordinates
    mz_ranges = [('auto','auto'), ('auto','auto')] # (min_mz, max_mz) tuples. Replace 'auto' with a non-string number if you want to explicitly specify the m/z range for each file
    concat_imzml_files(input_paths, offsets, mz_ranges, output_path)

Parsing imzML file for d:\saharuka\spotting\20_matrices\1_stitch_and_upload_datasets\pNA\21052021_ME_VS_slide9F_126x280_px150_att29_pNA_pos_rf50_mz70-350.imzML
Parsing imzML file for d:\saharuka\spotting\20_matrices\1_stitch_and_upload_datasets\pNA\21052021_ME_VS_slide9F_126x280_px150_att29_pNA_pos_rf50_mz300-1510.imzML
21052021_ME_VS_slide9F_126x280_px150_att29_pNA_pos_rf50_mz70-350.imzML detected m/z range: 70.0-350.0. Taking range -325.5
21052021_ME_VS_slide9F_126x280_px150_att29_pNA_pos_rf50_mz300-1510.imzML detected m/z range: 300.0-1510.0. Taking range 325.5-
Reading spectra for d:\saharuka\spotting\20_matrices\1_stitch_and_upload_datasets\pNA\21052021_ME_VS_slide9F_126x280_px150_att29_pNA_pos_rf50_mz70-350.imzML
Reading spectra for d:\saharuka\spotting\20_matrices\1_stitch_and_upload_datasets\pNA\21052021_ME_VS_slide9F_126x280_px150_att29_pNA_pos_rf50_mz300-1510.imzML
Writing to d:\saharuka\spotting\20_matrices\1_stitch_and_upload_datasets\pNA\pNA_pos_mz70-1510.imzML
Parsing imz

# Upload stitched datasets to METASPACE

In [1]:
from pathlib import Path
import json, getpass
from metaspace import SMInstance
import pandas as pd
from definitions import ROOT_DIR

- To avoid processing with HMDB, upload on staging

In [2]:
sm = SMInstance(host='https://metaspace2020.eu')

if not sm.logged_in():
    # Using getpass here prevents the API key from being accidentally saved with this notebook.
    api_key = getpass.getpass(prompt='API key: ', stream=None)
    sm.login(api_key=api_key)

API key: ········


Provide inputs, for each pair:

matrix full name, additional neutral gain just for this matrix (matrix molecule), solvent

In [3]:
matrix_fullname = dict({
    'DHB':['2,5-dihydroxybenzoic acid', ['+C7H6O4'], 'ACN (70% v/v, aq.)'],
    'DAN':['1,5-diaminonaphthalene', ['+C10H10N2', '+C10H8N2'], 'ACN (70% v/v, aq.)'],
    'norharmane':['norharmane', ['+C11H8N2'], 'CHCl3:MeOH (1:1 v/v)'],
    '9AA':['9-aminoacridine', ['+C13H10N2'], 'MeOH (70% v/v, aq.)'],
    'CHCA':['alpha-cyano-4-hydroxycinnamic acid', ['+C10H7NO3'], 'ACN (50% v/v, aq.)'],
    'ClCCA':['4-chloro-alpha-cyanocinnamic acid', ['+C10H6ClNO2'], 'ACN (50% v/v, aq.)'],
    'NEDC':['N-(1-naphthyl)ethylenediamine dihydrochloride', ['+C12H14N2', '+HCl'], 'ACN (70% v/v, aq.)'],
    'PNDIT2':['PNDI-T2', [], 'Toluene'],
    'MAPS':['Maleic anhydride proton sponge', ['+C18H18N2O3'], 'Toluene'],
    'DHAP': ['2,5-dihydroxyacetophenone', ['+C8H8O3'], 'ACN (70% v/v, aq.)'],
    'pNA' : ['4-Nitroaniline', ['+C6H6N2O2'], 'MeOH (85% v/v, aq.)']
})

- Define neutral losses in the function below

Might be of interest for interpreting neutral losses http://www.colby.edu/chemistry/PChem/StableLoss.html

In [4]:
def submit_dataset(dataset_name, matrix, solvent, polarity, adducts, extra_neutral_losses = [], databases = [('Spotting_project_compounds-v9', 'feb2021')], is_public = False):

    metadata = {
        'Data_Type': 'Imaging MS',  # shouldn't be changed
        'Sample_Information': {
            'Organism': 'None',
            'Organism_Part': 'None',
            'Condition': 'None',
            'Sample_Growth_Conditions': 'None'  # this is an extra field
        },
        'Sample_Preparation': {
            'Sample_Stabilisation': 'None',
            'Tissue_Modification': 'None',
            'MALDI_Matrix': matrix,
            'MALDI_Matrix_Application': 'TM-Sprayer',
            'Solvent': solvent,
        },
        'MS_Analysis': {
            'Polarity': polarity,
            'Ionisation_Source': 'AP-SMALDI5',
            'Analyzer': 'Orbitrap',
            'Detector_Resolving_Power': {
                'mz': 200,
                'Resolving_Power': 140000
            },
            'Pixel_Size': {
                'Xaxis': 150,
                'Yaxis': 150
            }
        }
    }
    
    ds_id  = sm.submit_dataset(
    imzml_fn, ibd_fn, dataset_name,
    json.dumps(metadata), is_public, databases,
    project_ids=['59c259d8-7036-11eb-96db-9bed61e6254b'],
    adducts=adducts,
    neutral_losses = ['-H2O', '-H2', '+H2', #redox
                      '-CO2', '-CH2O3', '-CH2O2', # CO2+H2O, formic acid?
                      '-HPO3', '-H3PO4', # phosphate
                      '-NH3',# '-C2H5NO2',  # glycine
                     ] + extra_neutral_losses
        
    )
    
    return ds_id

- Upload datasets

In [31]:
p = Path(ROOT_DIR) / "1_stitch_and_upload_datasets"

matrices = []
modes = []
ds_names = []
ids = []
losses = []

for mpath in p.iterdir():
    if mpath == p /'pNA':
#     if mpath.is_dir():
        for pol in ['pos', 'neg']:
            imzml_fn = list(mpath.rglob(f"*{pol}*mz70-1510*.imzML"))[0]
            ibd_fn = list(mpath.rglob(f"*{pol}*mz70-1510*.ibd"))[0]
            dataset_name = imzml_fn.name
            matrix = matrix_fullname[mpath.name][0]
            extra_neutral_losses = matrix_fullname[mpath.name][1]
            solvent = matrix_fullname[mpath.name][2]
            
            if pol == 'pos':
                polarity = 'Positive'
                adducts = ['[M]+', '+H', '+Na', '+K']
            else: 
                polarity = 'Negative'
                adducts = ['[M]-', '-H', '+Cl']
            
            ds_id = submit_dataset(dataset_name, matrix, solvent, polarity, adducts, extra_neutral_losses)

            matrices.append(matrix)
            modes.append(polarity)
            ds_names.append(dataset_name)
            ids.append(ds_id)
            losses.append(extra_neutral_losses)
            
df = pd.DataFrame({
    'matrix':matrices,
    'polarity':modes,
    'ds_name':ds_names,
    'ds_id':ids,
    'extra_neutral_losses':losses
})

Uploading   1 part of pNA_pos_mz70-1510.imzML file...
Uploading   2 part of pNA_pos_mz70-1510.imzML file...
Uploading   3 part of pNA_pos_mz70-1510.imzML file...
Uploading   4 part of pNA_pos_mz70-1510.imzML file...
Uploading   5 part of pNA_pos_mz70-1510.imzML file...
Uploading   6 part of pNA_pos_mz70-1510.imzML file...
Uploading   7 part of pNA_pos_mz70-1510.imzML file...
Uploading   8 part of pNA_pos_mz70-1510.imzML file...
Uploading   9 part of pNA_pos_mz70-1510.imzML file...
Uploading  10 part of pNA_pos_mz70-1510.imzML file...
Uploading  11 part of pNA_pos_mz70-1510.imzML file...
Uploading  12 part of pNA_pos_mz70-1510.imzML file...
Uploading  13 part of pNA_pos_mz70-1510.imzML file...
Uploading  14 part of pNA_pos_mz70-1510.imzML file...
Uploading  15 part of pNA_pos_mz70-1510.imzML file...
Uploading  16 part of pNA_pos_mz70-1510.imzML file...
Uploading  17 part of pNA_pos_mz70-1510.imzML file...
Uploading   1 part of pNA_pos_mz70-1510.ibd file...
Uploading   2 part of pNA_pos_

Uploading 141 part of pNA_pos_mz70-1510.ibd file...
Uploading 142 part of pNA_pos_mz70-1510.ibd file...
Uploading 143 part of pNA_pos_mz70-1510.ibd file...
Uploading 144 part of pNA_pos_mz70-1510.ibd file...
Uploading 145 part of pNA_pos_mz70-1510.ibd file...
Uploading 146 part of pNA_pos_mz70-1510.ibd file...
Uploading 147 part of pNA_pos_mz70-1510.ibd file...
Uploading 148 part of pNA_pos_mz70-1510.ibd file...
Uploading 149 part of pNA_pos_mz70-1510.ibd file...
Uploading 150 part of pNA_pos_mz70-1510.ibd file...
Uploading 151 part of pNA_pos_mz70-1510.ibd file...
Uploading 152 part of pNA_pos_mz70-1510.ibd file...
Uploading 153 part of pNA_pos_mz70-1510.ibd file...
Uploading 154 part of pNA_pos_mz70-1510.ibd file...
Uploading 155 part of pNA_pos_mz70-1510.ibd file...
Uploading 156 part of pNA_pos_mz70-1510.ibd file...
Uploading 157 part of pNA_pos_mz70-1510.ibd file...
Uploading 158 part of pNA_pos_mz70-1510.ibd file...
Uploading 159 part of pNA_pos_mz70-1510.ibd file...
Uploading 16

Uploading 299 part of pNA_pos_mz70-1510.ibd file...
Uploading 300 part of pNA_pos_mz70-1510.ibd file...
Uploading 301 part of pNA_pos_mz70-1510.ibd file...
Uploading 302 part of pNA_pos_mz70-1510.ibd file...
Uploading 303 part of pNA_pos_mz70-1510.ibd file...
Uploading 304 part of pNA_pos_mz70-1510.ibd file...
Uploading 305 part of pNA_pos_mz70-1510.ibd file...
Uploading 306 part of pNA_pos_mz70-1510.ibd file...
Uploading 307 part of pNA_pos_mz70-1510.ibd file...
Uploading 308 part of pNA_pos_mz70-1510.ibd file...
Uploading 309 part of pNA_pos_mz70-1510.ibd file...
Uploading 310 part of pNA_pos_mz70-1510.ibd file...
Uploading 311 part of pNA_pos_mz70-1510.ibd file...
Uploading 312 part of pNA_pos_mz70-1510.ibd file...
Uploading 313 part of pNA_pos_mz70-1510.ibd file...
Uploading 314 part of pNA_pos_mz70-1510.ibd file...
Uploading 315 part of pNA_pos_mz70-1510.ibd file...
Uploading 316 part of pNA_pos_mz70-1510.ibd file...
Uploading 317 part of pNA_pos_mz70-1510.ibd file...
Uploading 31

Uploading  62 part of pNA_neg_mz70-1510.ibd file...
Uploading  63 part of pNA_neg_mz70-1510.ibd file...
Uploading  64 part of pNA_neg_mz70-1510.ibd file...
Uploading  65 part of pNA_neg_mz70-1510.ibd file...
Uploading  66 part of pNA_neg_mz70-1510.ibd file...
Uploading  67 part of pNA_neg_mz70-1510.ibd file...
Uploading  68 part of pNA_neg_mz70-1510.ibd file...
Uploading  69 part of pNA_neg_mz70-1510.ibd file...
Uploading  70 part of pNA_neg_mz70-1510.ibd file...
Uploading  71 part of pNA_neg_mz70-1510.ibd file...
Uploading  72 part of pNA_neg_mz70-1510.ibd file...
Uploading  73 part of pNA_neg_mz70-1510.ibd file...
Uploading  74 part of pNA_neg_mz70-1510.ibd file...
Uploading  75 part of pNA_neg_mz70-1510.ibd file...
Uploading  76 part of pNA_neg_mz70-1510.ibd file...
Uploading  77 part of pNA_neg_mz70-1510.ibd file...
Uploading  78 part of pNA_neg_mz70-1510.ibd file...
Uploading  79 part of pNA_neg_mz70-1510.ibd file...
Uploading  80 part of pNA_neg_mz70-1510.ibd file...
Uploading  8

Uploading 220 part of pNA_neg_mz70-1510.ibd file...
Uploading 221 part of pNA_neg_mz70-1510.ibd file...
Uploading 222 part of pNA_neg_mz70-1510.ibd file...
Uploading 223 part of pNA_neg_mz70-1510.ibd file...
Uploading 224 part of pNA_neg_mz70-1510.ibd file...
Uploading 225 part of pNA_neg_mz70-1510.ibd file...
Uploading 226 part of pNA_neg_mz70-1510.ibd file...
Uploading 227 part of pNA_neg_mz70-1510.ibd file...
Uploading 228 part of pNA_neg_mz70-1510.ibd file...
Uploading 229 part of pNA_neg_mz70-1510.ibd file...
Uploading 230 part of pNA_neg_mz70-1510.ibd file...
Uploading 231 part of pNA_neg_mz70-1510.ibd file...
Uploading 232 part of pNA_neg_mz70-1510.ibd file...
Uploading 233 part of pNA_neg_mz70-1510.ibd file...
Uploading 234 part of pNA_neg_mz70-1510.ibd file...
Uploading 235 part of pNA_neg_mz70-1510.ibd file...
Uploading 236 part of pNA_neg_mz70-1510.ibd file...
Uploading 237 part of pNA_neg_mz70-1510.ibd file...
Uploading 238 part of pNA_neg_mz70-1510.ibd file...
Uploading 23

Uploading 378 part of pNA_neg_mz70-1510.ibd file...
Uploading 379 part of pNA_neg_mz70-1510.ibd file...
Uploading 380 part of pNA_neg_mz70-1510.ibd file...
Uploading 381 part of pNA_neg_mz70-1510.ibd file...
Uploading 382 part of pNA_neg_mz70-1510.ibd file...
Uploading 383 part of pNA_neg_mz70-1510.ibd file...
Uploading 384 part of pNA_neg_mz70-1510.ibd file...
Uploading 385 part of pNA_neg_mz70-1510.ibd file...
Uploading 386 part of pNA_neg_mz70-1510.ibd file...
Uploading 387 part of pNA_neg_mz70-1510.ibd file...
Uploading 388 part of pNA_neg_mz70-1510.ibd file...
Uploading 389 part of pNA_neg_mz70-1510.ibd file...
Uploading 390 part of pNA_neg_mz70-1510.ibd file...
Uploading 391 part of pNA_neg_mz70-1510.ibd file...
Uploading 392 part of pNA_neg_mz70-1510.ibd file...
Uploading 393 part of pNA_neg_mz70-1510.ibd file...
Uploading 394 part of pNA_neg_mz70-1510.ibd file...
Uploading 395 part of pNA_neg_mz70-1510.ibd file...
Uploading 396 part of pNA_neg_mz70-1510.ibd file...
Uploading 39

Uploading 536 part of pNA_neg_mz70-1510.ibd file...
Uploading 537 part of pNA_neg_mz70-1510.ibd file...
Uploading 538 part of pNA_neg_mz70-1510.ibd file...
Uploading 539 part of pNA_neg_mz70-1510.ibd file...
Uploading 540 part of pNA_neg_mz70-1510.ibd file...
Uploading 541 part of pNA_neg_mz70-1510.ibd file...
Uploading 542 part of pNA_neg_mz70-1510.ibd file...
Uploading 543 part of pNA_neg_mz70-1510.ibd file...
Uploading 544 part of pNA_neg_mz70-1510.ibd file...
Uploading 545 part of pNA_neg_mz70-1510.ibd file...
Uploading 546 part of pNA_neg_mz70-1510.ibd file...
Uploading 547 part of pNA_neg_mz70-1510.ibd file...
Uploading 548 part of pNA_neg_mz70-1510.ibd file...
Uploading 549 part of pNA_neg_mz70-1510.ibd file...
Uploading 550 part of pNA_neg_mz70-1510.ibd file...
Uploading 551 part of pNA_neg_mz70-1510.ibd file...
Uploading 552 part of pNA_neg_mz70-1510.ibd file...
Uploading 553 part of pNA_neg_mz70-1510.ibd file...
Uploading 554 part of pNA_neg_mz70-1510.ibd file...
Uploading 55

- Save information about uploaded dataset including dataset id on staging

In [32]:
df.to_csv(p / 'uploaded_datasets.csv')