In [6]:
import pandas as pd

from msac import process
import pkg_resources

# Process a full file at once within code.
## Make note of the name of your exact mass column.

In [7]:
# use the example file provided
example = pkg_resources.resource_filename('msac',
                                              'example_data/example_compounds_masses.csv')

# or uncomment to use your own
#example = 'my/path/mass_file.csv'


# replace with the name of your exact mass column
mass_col_name = 'ExactMass'
input_masses = pd.read_csv(example)
input_masses.head()

Unnamed: 0,Name,ExactMass,Formula,SMILES
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3


In [8]:
adducts = process.process_file(input_masses, mass_col = mass_col_name,
                                   adduct_file = None, outname = None,
                                   coverage_cutoff = None, restrict = None)
adducts.head()

Unnamed: 0,Name,ExactMass,Formula,SMILES,adduct,adduct mass
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,2M+ClO3,1037.384421
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...,2M+ClO3,1373.583968
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC,2M+ClO3,601.123061
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3,2M+ClO3,765.279562
4,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,2M+ClO4,1053.379335


In [15]:
input_masses.head()

Unnamed: 0,Name,ExactMass,Formula,SMILES
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3


## To use your own list of adducts.

In [16]:
# Use the example adduct lists provided with msac
my_adduct_file = pkg_resources.resource_filename('msac',
                                              'example_data/adduct_list_full.csv')
# To use adducts relative to your working folder
# uncomment the next line
#my_adduct_file = 'my/path/my_file.csv'

adducts = process.process_file(input_masses, mass_col = mass_col_name,
                                   adduct_file = my_adduct_file, outname = None,
                                   coverage_cutoff = None, restrict = None)
adducts.head()

Using supplied adduct file c:\users\blum443\onedrive - pnnl\documents\projects\msac_public\msac\msac\example_data\adduct_list_full.csv. Coverage cutoff not used.


Unnamed: 0,Name,ExactMass,Formula,SMILES,adduct,adduct mass
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,2M+ClO3,1037.384421
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...,2M+ClO3,1373.583968
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC,2M+ClO3,601.123061
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3,2M+ClO3,765.279562
4,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,2M+ClO4,1053.379335


## To limit by the frequency of the adduct's presence in MassBank/GNPS/NIST
### Note: adduct_file must be None for this feature.

In [17]:
# Limit by percent coverage
# Use any value 0.0-1.0
adducts = process.process_file(input_masses, mass_col = mass_col_name,
                                   adduct_file = None, outname = None,
                                   coverage_cutoff = 0.75, restrict = None)
adducts.head()

Unnamed: 0,Name,ExactMass,Formula,SMILES,adduct,adduct mass
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,M+H,478.222414
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...,M+H,646.322188
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC,M+H,260.091734
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3,M+H,342.169985
4,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,M-H,476.207861


In [18]:
# Limit by number of adducts to consider (from most common)
adducts = process.process_file(input_masses, mass_col = mass_col_name,
                                   adduct_file = None, outname = None,
                                   coverage_cutoff = 20, restrict = None)
adducts.head()

Unnamed: 0,Name,ExactMass,Formula,SMILES,adduct,adduct mass
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,2M+H,955.437552
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...,2M+H,1291.637099
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC,2M+H,519.176192
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3,2M+H,683.332693
4,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,2M-H,953.422999


## To restrict losses to only atoms in the compound
### Requires a column in your mass file specifying molecular formula

In [19]:
# Limit by percent coverage
formula_col = 'Formula'
adducts = process.process_file(input_masses, mass_col = mass_col_name,
                                   adduct_file = None, outname = None,
                                   coverage_cutoff = None, restrict = formula_col)
adducts.head()

Unnamed: 0,Name,ExactMass,Formula,SMILES,parent_atoms,adduct,adduct mass
0,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,"{'C': 28.0, 'H': 31.0, 'N': 1.0, 'O': 6.0}",2M+ClO3,1037.384421
1,Aconitine,645.314911,C34H47NO11,CCN1C[C@@]2(COC)C3[C@@H](OC)[C@H]4C1[C@@]3(C1C...,"{'C': 34.0, 'H': 47.0, 'N': 1.0, 'O': 11.0}",2M+ClO3,1373.583968
2,Skimmianine,259.084458,C14H13NO4,COc1ccc2c(OC)c3ccoc3nc2c1OC,"{'C': 14.0, 'H': 13.0, 'N': 1.0, 'O': 4.0}",2M+ClO3,601.123061
3,Artabotrine,341.162708,C20H23NO4,COc1ccc2c(c1O)-c1c(OC)c(OC)cc3c1[C@H](C2)N(C)CC3,"{'C': 20.0, 'H': 23.0, 'N': 1.0, 'O': 4.0}",2M+ClO3,765.279562
4,Speciozine,477.215138,C28H31NO6,COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](N(...,"{'C': 28.0, 'H': 31.0, 'N': 1.0, 'O': 6.0}",2M+ClO4,1053.379335


# An example of reformatting the output

In [29]:
adducts.pivot(index='Name', columns='adduct', values='adduct mass')

adduct,2M+ClO3,2M+ClO4,2M+H,2M+H+CH3OH,2M+K,2M+KSO4,2M+NH4,2M+Na,2M-2H+3Na,2M-2H+K,...,M-H-NH3,M-H-NO,M-H-NO4S,M-H-O,M-H-O2S,M-H-O3S,M-H-ON,M-H-SO2,M-H-SO3,M-SCCl2F
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aconitine,1373.583968,1389.578882,1291.637099,1323.663314,1329.592981,1425.545807,1308.663648,1313.619043,1357.582932,1327.578428,...,627.281086,614.309646,,628.31272,,,614.309646,,,
Artabotrine,765.279562,781.274476,683.332693,715.358908,721.288574,817.241401,700.359242,705.314637,749.278526,719.274021,...,323.128883,310.157443,,324.160517,,,310.157443,,,
Skimmianine,601.123061,617.117976,519.176192,551.202407,557.132074,653.084901,536.202741,541.158137,585.122025,555.117521,...,241.050632,228.079193,,242.082267,,,228.079193,,,
Speciozine,1037.384421,1053.379335,955.437552,987.463767,993.393433,1089.34626,972.464101,977.419496,1021.383385,991.37888,...,459.181312,446.209873,,460.212947,,,446.209873,,,
