# Empirical compounds via asari v0.8

➜  asari docker run -v /Users/shuzhao/li.projects:/home/jovyan -p 8888:8888 jupyter/scipy-notebook

SL 2022-01-04

In [1]:
!pip install pyopenms mass2chem metDatamodel

Collecting pyopenms
  Using cached https://files.pythonhosted.org/packages/b8/75/19cea60fc9c6e6a8a7fa7b20153c12122695e8f11fa82410dcf23ab68484/pyopenms-2.4.0-cp37-cp37m-manylinux1_x86_64.whl
Collecting mass2chem
  Using cached https://files.pythonhosted.org/packages/78/67/4baafbf5ad004197dbb0749a4738c0d05af4f5e4b1bd6db23c4a2d5d880c/mass2chem-0.1.9-py3-none-any.whl
Collecting metDatamodel
[?25l  Downloading https://files.pythonhosted.org/packages/86/8c/97b074abff3a041e3c75cb8fbb2fed98b73567e3efed4e38f6aa83b8450a/metDataModel-0.4.9-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.2MB 6.5MB/s eta 0:00:01
Installing collected packages: pyopenms, mass2chem, metDatamodel
Successfully installed mass2chem-0.1.9 metDatamodel-0.4.9 pyopenms-2.4.0


In [2]:
import sys
sys.path.append('asari')

In [5]:
from asari.samples import Sample
from asari.constructors import epdsConstructor

In [6]:
SS = Sample(input_file='T04/MG_20211022_011.mzML')

SS.get_masstraces()
SS.get_peaks(min_intensity_threshold=10000, min_fwhm=3, min_prominence_threshold=5000, snr=2)
print("Number of peaks: ", len(SS.list_peaks))

Processing T04/MG_20211022_011.mzML, found 14887 mass traces.
Number of peaks:  4707


In [7]:
print(SS.list_mass_traces[1670])

{'id_number': 1670, 'mz': 133.09702315984987, 'rt_scan_numbers': [612, 613, 615, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 642, 643, 644, 645, 646, 647, 648, 649, 651, 652, 653, 654, 655], 'intensity': [7967, 10721, 14990, 10664, 9825, 12258, 16991, 19570, 15545, 16898, 24500, 18768, 22527, 24850, 13112, 11250, 13889, 17174, 16088, 11104, 8000, 13908, 15075, 15650, 14064, 11988, 16422, 12735, 8315, 8434, 12309, 12433, 11085, 6977, 9395, 14015, 12897, 12087, 14388, 9152]}


In [8]:
SS.list_peaks[555]

{'parent_masstrace_id': 1670,
 'mz': 133.09702315984987,
 'apex': 654,
 'height': 14388.0,
 'left_base': 648,
 'right_base': 655,
 'id_number': 555}

In [9]:
from asari.search import *

In [10]:
mztree = build_centurion_tree(SS.list_peaks)

In [11]:
find_all_matches_centurion_indexed_list(83.0602, mztree)

[{'parent_masstrace_id': 2,
  'mz': 83.06027197744332,
  'apex': 451,
  'height': 259387.0,
  'left_base': 391,
  'right_base': 595,
  'id_number': 0}]

In [12]:
mztree[13309] # this is neighborhood of 133.09

[{'parent_masstrace_id': 1670,
  'mz': 133.09702315984987,
  'apex': 627,
  'height': 24850.0,
  'left_base': 612,
  'right_base': 648,
  'id_number': 554},
 {'parent_masstrace_id': 1670,
  'mz': 133.09702315984987,
  'apex': 654,
  'height': 14388.0,
  'left_base': 648,
  'right_base': 655,
  'id_number': 555}]

In [13]:
signatures = find_isotopic_signatures(SS.list_peaks, mztree, isotopic_patterns)
print(len(signatures), signatures[:5])

534 [[(1, 'anchor'), (3, '13C/12C')], [(143, 'anchor'), (157, '13C/12C')], [(181, 'anchor'), (184, '13C/12C')], [(182, 'anchor'), (191, '13C/12C'), (205, '18O/16O')], [(195, 'anchor'), (206, '13C/12C')]]


In [14]:
[st for st in signatures if len(st)>2][:5]

[[(182, 'anchor'), (191, '13C/12C'), (205, '18O/16O')],
 [(295, 'anchor'), (335, '13C/12C'), (368, 'M(13C),M(34S)')],
 [(296, 'anchor'), (339, '13C/12C'), (332, '15N/14N'), (333, '15N/14N')],
 [(541, 'anchor'), (552, '13C/12C'), (551, '15N/14N'), (558, '18O/16O')],
 [(839, 'anchor'), (866, '18O/16O'), (879, 'M(13C),M(34S)')]]

## to initiate empCpds using the isotopic and adduct signatures 

via class epdsConstructor

In [15]:
ECCON = epdsConstructor(SS.list_peaks)
list_empCpds = ECCON.peaks_to_epds()

Round 1 - numbers of epds and included peaks:  (534, 1180)
Round 2 - numbers of epds and included peaks:  (534, 2043)
Round 3 - numbers of epds:  982


In [16]:
list_empCpds[355:360]

[{'id': 355,
  'list_peaks': [(4211, 'anchor'), (4223, '13C/12C'), (4242, '18O/16O')]},
 {'id': 356,
  'list_peaks': [(4213, 'anchor'), (4243, '18O/16O'), (4339, 'anchor,+NH4')]},
 {'id': 357,
  'list_peaks': [(4214, 'anchor'),
   (4228, '13C/12C'),
   (4247, '18O/16O'),
   (4294, '18O/16O,Na/H, double charged')]},
 {'id': 358,
  'list_peaks': [(4215, 'anchor'), (4231, '13C/12C'), (4339, 'anchor,+NH4')]},
 {'id': 359,
  'list_peaks': [(4217, 'anchor'), (4232, '13C/12C'), (4339, 'anchor,+NH4')]}]

In [17]:
dir(ECCON)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'extend_empCpds_by_adducts',
 'extend_isosignatures_by_adducts',
 'mode',
 'peak_dict',
 'peak_list',
 'peaks_to_epds']

## Summary

Starting from 4707 peaks, we got 534 empCpds by isotopic pairs, increased to 982 after by common adducts.

The above is limited to isotopes and common adducts. More detailed annotation should use
`epdsConstructor.extend_empCpds_by_adducts` (not tested yet) to include more adducts & fragments.
