# Prepare authentic library as JSON empCpds 

Import csv library file and convert to JSON.

A master cpd table is used for referroing to compounds. This can be updated from other sources, as long as IDs match. 

Format for compound table: [{'identifier': 'HMDB0014869', ...}, ...]

Format for authentic library: {
    'method': 'HILIC_pos', 
    'method_description': '',
    'list_cpds': '',
    'list_features': [{'mz': 999.9901, 'rtime': 55.55, 'ion_relation': '','cpd': '', 'name': ''}, 
    ...],
}

In [19]:
import json
import csv

PROTON = 1.007276466812

# lib prepared by MT, SZ, JMM
src_csv = '/Users/lish/li.github/AuthStdLibrary/l1b_library_11_06_2024.csv'

In [14]:
cpdlib = list(csv.DictReader(open(src_csv)))
cpdlib[0]

{'name': 'Nateglinide',
 'group': 'Sub_1',
 'mass': '317.1990937',
 'file': '/Users/mitchjo/AuthStdLibrary/Box/MetaSci_Plus_BOX6.xlsx',
 'log_p': '3.2',
 'No.': '1.0',
 'identifier': 'HMDB0014869',
 'molecular_formula_desalt': 'C19H27NO3',
 'box': '6',
 'row': 'A',
 'column': '1',
 'hmdb_mono_mass': '317.1990937',
 'box_no': '6',
 'sgroup_no': '1',
 'HILIC_neg': '',
 'HILIC_pos': '',
 'RP_neg': '30.7',
 'RP_pos': '47.25',
 'Unnamed: 8': '',
 'Monoisotopic_Mass': '',
 'NO.': '',
 'molecular_formula_Desalt': ''}

In [15]:
cpdlib[22]

{'name': 'Cyclohexanecarboxylic Acid',
 'group': 'Sub_1',
 'mass': '128.0837296',
 'file': '/Users/mitchjo/AuthStdLibrary/Box/MetaSci_Plus_BOX6.xlsx',
 'log_p': '1.9',
 'No.': '23.0',
 'identifier': 'HMDB0031342',
 'molecular_formula_desalt': 'C7H12O2',
 'box': '6',
 'row': 'C',
 'column': '3',
 'hmdb_mono_mass': '128.0837296',
 'box_no': '6',
 'sgroup_no': '1',
 'HILIC_neg': '',
 'HILIC_pos': '27.86',
 'RP_neg': '27.91',
 'RP_pos': '',
 'Unnamed: 8': '',
 'Monoisotopic_Mass': '',
 'NO.': '',
 'molecular_formula_Desalt': ''}

In [None]:
# This is a simple table; can be extended by HMDB properites 
cpdTable = []
for entry in cpdlib:
    cpd_entry = {
        'identifier': entry['identifier'],
        'name': entry['name'],
        'neutral_formula': entry['molecular_formula_desalt'],
        'Monoisotopic_Mass': None if entry['hmdb_mono_mass'] == '' else float(entry['hmdb_mono_mass']),
    }
    cpdTable.append(cpd_entry)

cpdTable_output = 'compoundTable_LiLab_library_11_06_2024.json'
with open(cpdTable_output, 'w') as f:
    json.dump(cpdTable, f, indent=4)

In [24]:
# l1b_library_11_06_2024.csv should have ion_relation, as M+Na etc possible
data = {}
assumed_ion_relation = {
    'HILIC_pos' : 'M+H+',
    'HILIC_neg' : 'M-H-',
    'RP_pos' : 'M+H+',
    'RP_neg' : 'M-H-',
}
def calculate_mz(mono_mass, ion_relation):
    if ion_relation == 'M+H+':
        return mono_mass + PROTON
    elif ion_relation == 'M-H-':
        return mono_mass - PROTON
    else:
        raise ValueError(f"Unknown ion relation: {ion_relation}")
    
for method in ['HILIC_pos', 'HILIC_neg', 'RP_pos', 'RP_neg']:
    list_features = []
    ion_relation = assumed_ion_relation[method]
    for entry in cpdlib:
        if entry[method].strip() != '':
            rtime = float(entry[method])
            feature_entry = {
                'mz': calculate_mz(float(entry['mass']), ion_relation),
                'rtime': rtime,
                'ion_relation': ion_relation,
                'cpd': entry['identifier'],
                'name': entry['name'],
            }
            list_features.append(feature_entry)
            
    print(len(list_features))
    data[method] = list_features


405
514
431
631


In [26]:
for method in data:
    outfile = f'authenticLib_LiLab_{method}_11_06_2024.json'
    with open(outfile, 'w') as f:
        authentic_lib_ = {
            'method': method, 
            'method_description': '',
            'list_cpds': cpdTable_output,
            'list_features': data[method],
        }
        json.dump(authentic_lib_, f, indent=4)

# Conclusion

Exported authentic lib from `AuthStdLibrary/l1b_library_11_06_2024.csv` to 
one compound table and 4 authentic libs.

The compound table can be extended by more properites.

`AuthStdLibrary/l1b_library_11_06_2024.csv` should be fixed for `ion_relation`.

Use these files for mummichog development for now. 