# Download Molecular Database

In [4]:
from metaspace.sm_annotation_utils import MolDBClient, get_config

In [5]:
config = get_config('https://metaspace2020.eu')
client = MolDBClient(config)
client.getDatabaseList()

[MolDB(BraChemDB-2018-01 [2018-03-08]),
 MolDB(ChEBI [2016]),
 MolDB(ChEBI-2018-01 [2018-03-08]),
 MolDB(ECMDB-2018-12 [2018-12-13]),
 MolDB(EMBL-dev1 [2018-09-12]),
 MolDB(EMBL-dev2 [2000-01-01]),
 MolDB(HMDB-v2.5 [2018-03-08]),
 MolDB(HMDB-v2.5-cotton [2017-07-31]),
 MolDB(HMDB-v4 [2018-04-09]),
 MolDB(HMDB-v4-cotton [2018-04-27]),
 MolDB(HMDB-v4-endogenous [2018-04-09]),
 MolDB(LIPID_MAPS [2016]),
 MolDB(LipidMaps-2017-12-12 [2018-03-08]),
 MolDB(M4I_1-2019-06 [2019-06-15]),
 MolDB(PAMDB-v1.0 [2018-03-08]),
 MolDB(SwissLipids [2016]),
 MolDB(SwissLipids-2018-02-02 [2018-03-08])]

In [6]:
moldb = client.getDatabase('HMDB-v4')
# moldb = client.getDatabase('HMDB-v4-endogenous')
df = pd.DataFrame(moldb.molecules(limit=500000))
df.shape

(113742, 3)

In [7]:
df.head()

Unnamed: 0,sf,mol_id,mol_name
0,C18H20O5,HMDB0140276,"1-(2-hydroxy-4,6-dimethoxyphenyl)-3-(4-methoxy..."
1,C17H16O5,HMDB0140278,"1-(2,6-dihydroxy-4-methoxyphenyl)-3-(4-methoxy..."
2,C17H16O5,HMDB0140279,"1-(2,4-dihydroxy-6-methoxyphenyl)-3-(4-methoxy..."
3,C17H16O5,HMDB0140280,"1-(2-hydroxy-4,6-dimethoxyphenyl)-3-(4-hydroxy..."
4,C18H18O6,HMDB0140281,"1-(3,6-dihydroxy-2,4-dimethoxyphenyl)-3-(4-met..."


# Compute Formula Masses (Neutral Molecules)

In [29]:
# cpyMSpec library needs to be installed first, version 0.3.5 for consistency with Metaspace
# pip install cpyMSpec==0.3.5

In [23]:
import numpy as np
from cpyMSpec import InstrumentModel, isotopePattern

In [24]:
def calculate_masses(formula, charge=None, resolving_power=1e7):
    iso_pattern = isotopePattern(str(formula))
    if charge:
        iso_pattern.addCharge(charge)
    instrument_model = InstrumentModel('tof', resolving_power)
    centr = iso_pattern.centroids(instrument_model)
    return np.array(centr.masses), np.array(centr.intensities)

In [31]:
formula = 'Fe'
mzs, ints = calculate_masses(formula, charge=0)
mzs, ints

(array([55.93494186, 53.93961463, 56.93539832, 57.93328005]),
 array([1.        , 0.06368354, 0.02310279, 0.00307373]))

In [32]:
formula = 'C18H20O5'
mzs, ints = calculate_masses(formula, charge=0)
mzs, ints

(array([316.13107331, 317.13442831, 318.13778301, 318.13531933,
        317.1373509 , 319.13867443, 317.13529083, 319.14113821,
        318.14070481, 318.13864568, 320.14202915]),
 array([1.00000000e+00, 1.94766122e-01, 1.78990264e-02, 1.02807676e-02,
        2.30033082e-03, 2.00172960e-03, 1.90564346e-03, 1.03313870e-03,
        4.47687364e-04, 3.70988200e-04, 1.84028135e-04]))

In [45]:
def calculate_formula_weight(formula, charge=0):
    mzs, ints = calculate_masses(formula, charge)
    return mzs[ints.argmax()] # the most abundant isotope mass

In [46]:
calculate_formula_weight(formula + 'H', charge=1)

317.13834976518814