In [1]:
# only run this cell if OPEs_ID is not installed
import sys
import os

sys.path.append(os.path.abspath(".."))

In [2]:
from pathlib import Path

import pandas as pd

from OPEs_ID.defines import ColumnNames as C 
from OPEs_ID.expr import ChemFormula 
from OPEs_ID.io import load_mzml

In [3]:
# load files
ms1_neg, ms2_neg = load_mzml(r"OPE-meth-neg.mzML")
ms1_pos, ms2_pos = load_mzml(r"OPE-meth-pos.mzML")
calc_data = {}  # save middle results

100%|██████████| 3159/3159 [00:02<00:00, 1411.50it/s]
100%|██████████| 5178/5178 [00:03<00:00, 1406.30it/s]


### OPE fragment search

In [4]:
from OPEs_ID.tools import TargetIon

# Define OPE fragments

target_ions = [
    TargetIon(ChemFormula("PO4H4+"), "P(=O)(-O)(-O)-O", "Alkyl"),
    TargetIon(ChemFormula("C6H8O4P+"), "P(=O)(-O)(-O)-O-c1ccccc1", "Aryl"),
    TargetIon(ChemFormula("C12H12O4P+"), "P(=O)(-O)(-O-c1ccccc1)-O-c1ccccc1", "Aryl"),
    TargetIon(
        ChemFormula("C7H10O4P+"),
        "[$(P(=O)(-O)(-O)-O-[$(c1c(-[CH3])cccc1),$(c1cc(-[CH3])ccc1),$(c1ccc(-[CH3])cc1)]),$(P(=O)(-O)(-O-[CH3])-O-c1ccccc1)]",
        "Aryl",
    ),
    TargetIon(
        ChemFormula("C14H16O4P+"),
        "P(=O)(-O)(-O-[$(c1c(-[CH3])cccc1),$(c1cc(-[CH3])ccc1),$(c1ccc(-[CH3])cc1)])-O-[$(c1c(-[CH3])cccc1),$(c1cc(-[CH3])ccc1),$(c1ccc(-[CH3])cc1)]",
        "Aryl",
    ),
    TargetIon(ChemFormula("CH6O4P+"), "P(=O)(-O)(-O)-O-[CH3]", "Alkyl"),
]

In [5]:
from OPEs_ID.ms2_tools import is_mass_in

MS2_FILTER_MASS_ACC = 20e-6  # The relative tolerance for OPE fragments searching
dic = {}
for ion in target_ions:
    dic[ion.refname] = is_mass_in(ms2_pos[C.SpecMZ], ion.mass, rtol=MS2_FILTER_MASS_ACC)
search_results = pd.DataFrame(dic, index=ms2_pos.index)
hit_sel = search_results.any(axis=1)
arly_ions_names = [ion.refname for ion in target_ions if ion.type == "Aryl"]
hit_results = search_results[hit_sel]
calc_data["OPEFragment"] = hit_results.assign(**{C.OPEClass: "Alkly"})
calc_data["OPEFragment"].loc[
    hit_results[arly_ions_names].any(axis=1), C.OPEClass
] = "Aryl"
calc_data["FragMS2"] = ms2_pos.loc[hit_sel]

In [6]:
calc_data["OPEFragment"]  # fragment search results

Unnamed: 0_level_0,H4O4P+,C6H8O4P+,C12H12O4P+,C7H10O4P+,C14H16O4P+,CH6O4P+,Class
MS2IDX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
101,False,False,False,False,False,True,Alkly
385,False,False,False,False,False,True,Alkly
460,False,False,False,False,False,True,Alkly
490,False,False,False,False,False,True,Alkly
511,False,False,False,False,False,True,Alkly
...,...,...,...,...,...,...,...
3955,False,False,False,False,False,True,Alkly
3987,False,False,False,False,False,True,Alkly
4034,False,False,False,False,False,True,Alkly
4084,True,False,False,False,False,False,Alkly


In [7]:
calc_data["FragMS2"]  # other infomation of the precursor with OPE fragments

Unnamed: 0_level_0,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,SpecINT,MS1IDX
MS2IDX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
101,14.332499,114.091286,2.008852e+06,468423.468750,,"[51.940521240234375, 55.01850509643555, 55.054...","[1869.2684326171875, 8070.85400390625, 14583.5...",21
385,53.600676,113.059631,1.731070e+06,172608.968750,,"[51.940765380859375, 53.00286102294922, 53.039...","[1458.5789794921875, 1840.9471435546875, 3662....",81
460,64.153938,143.034851,4.645279e+06,528538.375000,,"[55.05503845214844, 56.845458984375, 56.942993...","[2469.441650390625, 1941.6021728515625, 7080.2...",98
490,67.998942,128.018463,1.171862e+07,369975.625000,,"[53.34624481201172, 53.85268020629883, 55.9349...","[10632.4169921875, 11350.876953125, 18887.0976...",104
511,70.962450,125.038673,1.185451e+06,193918.953125,,"[54.034568786621094, 55.01864242553711, 55.054...","[3067.632080078125, 5009.3466796875, 6878.4936...",109
...,...,...,...,...,...,...,...,...
3955,561.639948,114.054878,2.078415e+06,172524.109375,,"[51.94061279296875, 54.03459930419922, 55.0184...","[2659.22265625, 1437.8873291015625, 9126.29003...",896
3987,566.670696,113.096016,1.653141e+06,231633.718750,,"[51.940765380859375, 53.03928756713867, 55.018...","[1970.55712890625, 2573.38916015625, 7759.3041...",905
4034,573.203334,113.059647,1.773958e+06,186422.656250,,"[50.0598030090332, 50.73855209350586, 51.94059...","[1385.249755859375, 1498.3548583984375, 1859.9...",915
4084,580.790466,143.069855,1.777976e+06,167788.812500,,"[55.69074249267578, 56.942867279052734, 57.070...","[1640.77099609375, 7311.1923828125, 2232.68310...",928


### ROI group

In [8]:
from OPEs_ID.tools import ms2_ms1_roi

ROI_AGGREGATION_MASS_ACC = 20e-6
ms2_OPE_pos_roi_id, ms1_pos_roi_group = ms2_ms1_roi(
    calc_data["FragMS2"], ms1_pos, ROI_AGGREGATION_MASS_ACC
)
peak_idx = ms2_OPE_pos_roi_id.groupby(C.ROIGroupID)[C.PrecursorMS1Int].idxmax() # find out the max intensity precursors' indexes in each group 
ms2_pos_OPE_peak = calc_data["FragMS2"].loc[peak_idx].copy()
ms2_pos_OPE_peak[C.ROIGroupID] = ms2_OPE_pos_roi_id.loc[
    ms2_pos_OPE_peak.index, C.ROIGroupID
]
calc_data["FragMS2Peak"] = ms2_pos_OPE_peak
del ms2_pos_OPE_peak

100%|██████████| 64/64 [00:09<00:00,  6.94it/s]


In [9]:
calc_data["FragMS2Peak"] # the thorough infomation of the max intensity precursors in each group 

Unnamed: 0_level_0,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,SpecINT,MS1IDX,group_id
MS2IDX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
734,100.649982,114.091263,2147649.0,493569.625,,"[53.03919219970703, 54.063995361328125, 55.018...","[2154.56396484375, 1418.2442626953125, 8768.02...",154,0
2210,307.454118,113.059608,1798861.0,192685.578125,,"[53.03926467895508, 55.018531799316406, 55.054...","[2920.6435546875, 8555.4482421875, 15231.45703...",478,1
460,64.153938,143.034851,4645279.0,528538.375,,"[55.05503845214844, 56.845458984375, 56.942993...","[2469.441650390625, 1941.6021728515625, 7080.2...",98,2
490,67.998942,128.018463,11718620.0,369975.625,,"[53.34624481201172, 53.85268020629883, 55.9349...","[10632.4169921875, 11350.876953125, 18887.0976...",104,3
3385,478.77609,125.038704,1526145.0,295892.65625,1.0,"[53.03925704956055, 54.18890380859375, 55.0183...","[2339.498779296875, 1307.132568359375, 3081.58...",760,4
513,71.103948,127.03878,3606761.0,287749.15625,,"[50.43987274169922, 51.9406623840332, 53.03919...","[2382.763916015625, 10134.8466796875, 2629.993...",109,5
3506,496.580358,181.986526,2328268.0,486204.09375,,"[55.01850128173828, 55.0549430847168, 55.93484...","[1946.7650146484375, 4642.26171875, 2793.28247...",790,6
3647,517.012632,171.137405,1482800.0,195220.03125,,"[52.27368927001953, 55.0549430847168, 55.93494...","[1567.4835205078125, 5526.89990234375, 1992.06...",823,7
682,93.58248,143.069763,2334462.0,168625.6875,,"[55.05488586425781, 56.94288635253906, 57.0703...","[2014.331787109375, 7389.47265625, 1792.378540...",143,8
733,100.573104,182.022949,2445212.0,521405.53125,1.0,"[55.054847717285156, 55.93498611450195, 57.034...","[4546.1259765625, 3138.661865234375, 2805.5876...",154,9


### Predict Cl

In [11]:
from OPEs_ID.elements.table import Cl
from OPEs_ID.isotope_predict import predict_isotope

isotope_params = {Cl: range(1, 10)} # limit Cl between 1 and 10

calc_data["FragMS2Peak"]["Isotopes"] = predict_isotope(
    calc_data["FragMS2Peak"], ms1_pos, isotope_params, mass_acc=5e-6, top_n=5
)
calc_data["FragMS2Peak"] # see "Isotopes" column, in a form of (similarity_score, Cl_type)

100%|██████████| 24/24 [00:00<00:00, 230.77it/s]


Unnamed: 0_level_0,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,SpecINT,MS1IDX,group_id,Isotopes
MS2IDX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
734,100.649982,114.091263,2147649.0,493569.625,,"[53.03919219970703, 54.063995361328125, 55.018...","[2154.56396484375, 1418.2442626953125, 8768.02...",154,0,
2210,307.454118,113.059608,1798861.0,192685.578125,,"[53.03926467895508, 55.018531799316406, 55.054...","[2920.6435546875, 8555.4482421875, 15231.45703...",478,1,
460,64.153938,143.034851,4645279.0,528538.375,,"[55.05503845214844, 56.845458984375, 56.942993...","[2469.441650390625, 1941.6021728515625, 7080.2...",98,2,
490,67.998942,128.018463,11718620.0,369975.625,,"[53.34624481201172, 53.85268020629883, 55.9349...","[10632.4169921875, 11350.876953125, 18887.0976...",104,3,
3385,478.77609,125.038704,1526145.0,295892.65625,1.0,"[53.03925704956055, 54.18890380859375, 55.0183...","[2339.498779296875, 1307.132568359375, 3081.58...",760,4,
513,71.103948,127.03878,3606761.0,287749.15625,,"[50.43987274169922, 51.9406623840332, 53.03919...","[2382.763916015625, 10134.8466796875, 2629.993...",109,5,
3506,496.580358,181.986526,2328268.0,486204.09375,,"[55.01850128173828, 55.0549430847168, 55.93484...","[1946.7650146484375, 4642.26171875, 2793.28247...",790,6,
3647,517.012632,171.137405,1482800.0,195220.03125,,"[52.27368927001953, 55.0549430847168, 55.93494...","[1567.4835205078125, 5526.89990234375, 1992.06...",823,7,
682,93.58248,143.069763,2334462.0,168625.6875,,"[55.05488586425781, 56.94288635253906, 57.0703...","[2014.331787109375, 7389.47265625, 1792.378540...",143,8,
733,100.573104,182.022949,2445212.0,521405.53125,1.0,"[55.054847717285156, 55.93498611450195, 57.034...","[4546.1259765625, 3138.661865234375, 2805.5876...",154,9,


### Predict formulas

In [12]:
from OPEs_ID.formula import predict_formula
from OPEs_ID.elements.table import Cl35, Cl37
from OPEs_ID.utils import ProgressParallel
import joblib


@joblib.delayed
def get_formula(mz, Cl_isotope, is_Aryl, charge):
    isotope_expr = Cl_isotope
    if isotope_expr is None:
        n_Cl35 = 0
        n_Cl37 = 0
    else:
        n_Cl35 = isotope_expr[1][Cl35]
        n_Cl37 = isotope_expr[1][Cl37]
    if is_Aryl:
        min_DoU = 5
    else:
        min_DoU = 1
    formulas = predict_formula(
        mz,
        mass_acc=5e-6,
        charge=charge,
        lim_C=range(100),
        lim_H=range(200),
        lim_O=range(4, 40),
        lim_DoU=range(min_DoU, 51),
        lim_P=[1, 2],
        lim_Cl35=[n_Cl35],
        lim_Cl37=[n_Cl37],
    )
    return formulas


spec_info = calc_data["FragMS2Peak"]
arly_info = calc_data["OPEFragment"]

tasks = [
    get_formula(
        spec_info.at[idx, C.PrecursorMZ],
        spec_info.at[idx, C.Isotopes],
        arly_info.at[idx, C.OPEClass] == "Aryl",
        charge=1,
    )
    for idx in spec_info.index
]

formulas = ProgressParallel(n_jobs=-1)(tasks)

calc_data["FragMS2Peak"]["Formulas"] = formulas
del formulas, spec_info, arly_info
calc_data["FragMS2Peak"] # see "Formulas" column, `[]` means no valid formula predicted.

100%|██████████| 24/24 [00:02<00:00, 11.75it/s]


Unnamed: 0_level_0,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,SpecINT,MS1IDX,group_id,Isotopes,Formulas
MS2IDX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
734,100.649982,114.091263,2147649.0,493569.625,,"[53.03919219970703, 54.063995361328125, 55.018...","[2154.56396484375, 1418.2442626953125, 8768.02...",154,0,,[]
2210,307.454118,113.059608,1798861.0,192685.578125,,"[53.03926467895508, 55.018531799316406, 55.054...","[2920.6435546875, 8555.4482421875, 15231.45703...",478,1,,[]
460,64.153938,143.034851,4645279.0,528538.375,,"[55.05503845214844, 56.845458984375, 56.942993...","[2469.441650390625, 1941.6021728515625, 7080.2...",98,2,,[]
490,67.998942,128.018463,11718620.0,369975.625,,"[53.34624481201172, 53.85268020629883, 55.9349...","[10632.4169921875, 11350.876953125, 18887.0976...",104,3,,[]
3385,478.77609,125.038704,1526145.0,295892.65625,1.0,"[53.03925704956055, 54.18890380859375, 55.0183...","[2339.498779296875, 1307.132568359375, 3081.58...",760,4,,[]
513,71.103948,127.03878,3606761.0,287749.15625,,"[50.43987274169922, 51.9406623840332, 53.03919...","[2382.763916015625, 10134.8466796875, 2629.993...",109,5,,[]
3506,496.580358,181.986526,2328268.0,486204.09375,,"[55.01850128173828, 55.0549430847168, 55.93484...","[1946.7650146484375, 4642.26171875, 2793.28247...",790,6,,[]
3647,517.012632,171.137405,1482800.0,195220.03125,,"[52.27368927001953, 55.0549430847168, 55.93494...","[1567.4835205078125, 5526.89990234375, 1992.06...",823,7,,[]
682,93.58248,143.069763,2334462.0,168625.6875,,"[55.05488586425781, 56.94288635253906, 57.0703...","[2014.331787109375, 7389.47265625, 1792.378540...",143,8,,[]
733,100.573104,182.022949,2445212.0,521405.53125,1.0,"[55.054847717285156, 55.93498611450195, 57.034...","[4546.1259765625, 3138.661865234375, 2805.5876...",154,9,,[]


In [16]:
# List each valid formula as an entry
calc_data["FragMS2PeakWithFormula"] = calc_data["FragMS2Peak"].loc[
    calc_data["FragMS2Peak"]["Formulas"].apply(lambda x: len(x) != 0)
]
calc_data["FragMS2PeakWithFormula"]

Unnamed: 0_level_0,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,SpecINT,MS1IDX,group_id,Isotopes,Formulas
MS2IDX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
760,104.094114,127.015205,2063805.0,225840.625,,"[51.94062423706055, 52.66021728515625, 53.0392...","[10902.330078125, 1544.514404296875, 4500.6450...",159,10,,[[C12]2[H1]8[O16]4[P31]+]
1441,199.182984,141.030655,2255624.0,842423.625,1.0,"[53.03919219970703, 55.05458068847656, 55.9459...","[2441.86474609375, 1882.4317626953125, 1388.19...",308,16,,[[C12]3[H1]10[O16]4[P31]+]


In [17]:
from OPEs_ID.elements.table import H1


def chargedToNeutralFormula(f: ChemFormula):
    """Minus a H+ in formula `f`.
    Used for transforming the [M+H]+ formula to [M] formula.

    Parameters
    ----------
    f : ChemFormula

    Returns
    -------
    ChemFormula
    """
    e = f.copy()
    e[H1] -= e.charge
    e.charge = 0
    return e


def drop2H(f: ChemFormula):
    """Minus two H+ in formula `f`
    Used for transforming the [M+H]+ formula to [M-H]- formula.

    Parameters
    ----------
    f : ChemFormula

    Returns
    -------
    ChemFormula
    """
    e = f.copy()
    e.charge -= 2
    e[H1] -= 2
    return e

### Compare the intensity between negative and positive mode

In [18]:
from OPEs_ID.tools import search_from_another_ms1

tri_pos = []
for ms2_idx, rt, precursor_mz, ms1int, formulas in calc_data["FragMS2PeakWithFormula"][
    [C.RT, C.PrecursorMZ, C.PrecursorMS1Int, "Formulas"]
].itertuples():
    for f in formulas:
        mass = drop2H(f).mass
        tic_int = search_from_another_ms1(mass, rt, ms1_neg, 5e-6, rt_atol=60) 
        tri_pos.append((ms2_idx, f, (precursor_mz - f.mass) / f.mass, ms1int > tic_int))
tri_pos_df = pd.DataFrame(
    tri_pos, columns=[C.MS2IDX, "Formula", "Deviation", "Tri-ester"]
)

In [19]:
# Merge results to get the detailed infomation
output1_results = pd.merge(
    tri_pos_df,
    calc_data["FragMS2PeakWithFormula"].drop(columns=["Formulas", C.ROIGroupID]),
    left_on=C.MS2IDX,
    right_index=True,
).merge(calc_data["OPEFragment"], left_on=C.MS2IDX, right_index=True)

In [20]:
output1_results

Unnamed: 0,MS2IDX,Formula,Deviation,Tri-ester,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,SpecINT,MS1IDX,Isotopes,H4O4P+,C6H8O4P+,C12H12O4P+,C7H10O4P+,C14H16O4P+,CH6O4P+,Class
0,760,[C12]2[H1]8[O16]4[P31]+,-2e-06,True,104.094114,127.015205,2063805.0,225840.625,,"[51.94062423706055, 52.66021728515625, 53.0392...","[10902.330078125, 1544.514404296875, 4500.6450...",159,,False,False,False,False,False,True,Alkly
1,1441,[C12]3[H1]10[O16]4[P31]+,-3e-06,True,199.182984,141.030655,2255624.0,842423.625,1.0,"[53.03919219970703, 55.05458068847656, 55.9459...","[2441.86474609375, 1882.4317626953125, 1388.19...",308,,False,False,False,False,False,True,Alkly


### Metfrag

In [19]:
# Prepare metfrag configuration
# Refer to https://ipb-halle.github.io/MetFrag/projects/metfragcl

from OPEs_ID.metfrag import MetFragParameter

metfrag_params = []
for ms2_idx, precursor_mz, formula, mz_spec, int_spec, is_tri in output1_results[
    [C.MS2IDX, C.PrecursorMZ, "Formula", C.SpecMZ, C.SpecINT, "Tri-ester"]
].itertuples(index=False):
    neutral_formula = chargedToNeutralFormula(formula).monoisotopic_formula()
    p = MetFragParameter()
    p.NeutralPrecursorMolecularFormula = neutral_formula
    p.PeakListString = ";".join(f"{a}_{b}" for (a, b) in zip(mz_spec, int_spec))
    p.IonizedPrecursorMass = precursor_mz
    p.MaximumTreeDepth = 2
    p.MetFragDatabaseType = "LocalCSV"  # "PubChem"
    p.LocalDatabasePath = Path("db.csv").resolve() 
    smarts_list = []
    if is_tri:
        smarts_list.append("[#6]-O-P(=O)(-O-[#6])-O-[#6]")
    else:
        smarts_list.append("[HO]-P(=O)(-O)-O-[#6]")
    for ion in target_ions:
        if calc_data["OPEFragment"].at[ms2_idx, ion.refname]:
            smarts_list.append(ion.smarts)
    p.FilterSmartsInclusionList = smarts_list
    p.MetFragPreProcessingCandidateFilter = ["UnconnectedCompoundFilter"]
    if p.FilterSmartsInclusionList is not None:
        p.MetFragPreProcessingCandidateFilter.append(
            "SmartsSubstructureInclusionFilter"
        )
    p.MetFragScoreTypes = "FragmenterScore"
    p.MetFragScoreWeights = 1
    p.ResultsPath = Path("compute_dir").resolve() # Where to save the results files
    p.SampleName = rf"{ms2_idx}_{neutral_formula}_{precursor_mz}"
    p.m_ParamSavePath = (
        rf"param_dir/{ms2_idx}_{neutral_formula}_{precursor_mz}_param.txt"
    ) # Where to save the parameter files

    metfrag_params.append(p)

In [20]:
from OPEs_ID.metfrag import Config

Config.get_java_exe().exists()

# Config.set_java_exe(Path("path/to/java.exe"))
# Uncomment and run the above code to set java.exe path if necessary

True

In [30]:
from OPEs_ID.metfrag.run import AsyncMetFragPool

pool = AsyncMetFragPool(n_jobs=4) # n_jobs is the number of parallel worker
re = pool(metfrag_params)

100%|██████████| 2/2 [00:02<00:00,  1.21s/it]


In [31]:
from OPEs_ID.metfrag.run import MetFragFailed

metfrag_summary = [
    m.shape[0] if not isinstance(m, MetFragFailed) else "Error" for m in re
]
output1_results["Metfrag"] = metfrag_summary

In [None]:
output1_results

Unnamed: 0,MS2IDX,Formula,Deviation,Tri-ester,RT,Precursor,PrecursorInt,MS1INT,Charge,SpecMZ,...,MS1IDX,Isotopes,H4O4P+,C6H8O4P+,C12H12O4P+,C7H10O4P+,C14H16O4P+,CH6O4P+,Class,Metfrag
0,760,[C12]2[H1]8[O16]4[P31]+,-2e-06,True,104.094114,127.015205,2063805.0,225840.625,,"[51.94062423706055, 52.66021728515625, 53.0392...",...,159,,False,False,False,False,False,True,Alkly,0
1,1441,[C12]3[H1]10[O16]4[P31]+,-3e-06,True,199.182984,141.030655,2255624.0,842423.625,1.0,"[53.03919219970703, 55.05458068847656, 55.9459...",...,308,,False,False,False,False,False,True,Alkly,1


In [33]:
OUTPUT_NAME = "OPEs_list1.xlsx"
output1_results.drop(  # drop unnecessary columns
    columns=[C.MS1IDX, C.MS2IDX, C.SpecMZ, C.SpecINT]
).sort_values(  # sort values
    [C.OPEClass, C.RT]
).set_index(  # set multiindex for better format
    [
        C.OPEClass,
        C.RT,
        C.PrecursorMZ,
        C.PrecursorMS1Int,
        C.Isotopes,
        C.IsTriester,
    ]
).to_excel(
    OUTPUT_NAME
)

In [None]:
from OPEs_ID.metfrag.output import to_excel_with_mol_img
from OPEs_ID.metfrag.run import MetFragFailed
from collections import namedtuple

OUTPUT_DIR = Path("OPEs_PM_QC1_db")  # path to save detailed metfrag results
OUTPUT_DIR.mkdir(exist_ok=True)
NoResultEntry = namedtuple(
    "NoResultEntry",
    ["RT", "Procursor", "formula", "comments", "webcode", "stdout", "stderr"],
)
no_results = []
for idx, param, metfrag_result in zip(output1_results.index, metfrag_params, re):
    rt = output1_results.at[idx, C.RT]
    precursor_mz = output1_results.at[idx, C.PrecursorMZ]
    formula = output1_results.at[idx, C.Formula]

    if isinstance(metfrag_result, MetFragFailed):
        no_results.append(
            NoResultEntry(
                rt,
                precursor_mz,
                formula,
                None,
                metfrag_result.returncode,
                metfrag_result.stdout,
                metfrag_result.stderr,
            )
        )
    else:
        if metfrag_result.size == 0:
            no_results.append(
                NoResultEntry(
                    rt,
                    precursor_mz,
                    param.NeutralPrecursorMolecularFormula,
                    "No Valid",
                    None,
                    None,
                    None,
                )
            )
        else:
            to_excel_with_mol_img(
                metfrag_result,
                fname=OUTPUT_DIR / f"{precursor_mz:.4f}_{rt:.4f}_{formula}.xlsx",
                metfrag_param=param,
            )

if len(no_results) != 0:
    pd.DataFrame(no_results).to_excel(OUTPUT_DIR / "no_results.xlsx")