# Correct Ph031

Change all relevant entries related to initiator Ph031 in the virtuallibrary table.
Only products A-D, F, and G are affected.

- Add a comment about the side product
- Append +H2O to molecular_formula_alt
- Append +18.0106 to lcms_mass_alt

In [None]:
import copy
import re
import sqlite3

import pandas as pd

In [None]:
# Connect to database
con = sqlite3.connect('../../data/db/50k_project.db')
con

In [None]:
# Get all Ph031 entries
df = pd.read_sql_query(
    "SELECT * FROM virtuallibrary WHERE initiator_long = 'Ph031' AND type IN ('A', 'B', 'C', 'D', 'F', 'G')",
    con
)
df

In [None]:
# add comment
df['comment'] = 'oxazoline->amide side product for Ph031'

In [None]:
# some helper functions
def parse_formula(formula : str) -> dict: # Formula Parsing by Aditya Matam
    def multiply(formula: dict, mul: int) -> None:
        for key in formula: formula[key] *= mul

    formDict = {}
    # PARENS
    for match in re.finditer(r"\((.*?)\)(\d*)", formula):
        parens = parse_formula(match.group(1))
        mul = match.group(2)
        if not mul: mul = 1
        multiply(parens, int(mul))
        formDict.update(parens)
    # REST
    for match in re.finditer(r"(\(?)([A-Z][a-z]?)(\d*)(\)?)", formula):
        left, elem, mul, right = match.groups()
        if left or right: continue
        if not mul: mul = 1
        if elem in formDict:
            formDict[elem] += int(mul)
        else:
            formDict[elem] = int(mul)

    return formDict

def formula_to_string(formDict):
    s = ''
    for key, value in formDict.items():
        if value == 1:
            s += key
        elif value > 1:
            s += f'{key}{value}'
    return s

def add_formulae(formula1, formula2):
    result = copy.deepcopy(formula1)  # we make a deepcopy to not alter the first formula
    for key, value in formula2.items():
        result[key] += value
    return result

def string_formula_addition(formula1, formula2):
    return formula_to_string(add_formulae(parse_formula(formula1), parse_formula(formula2)))

In [None]:
# append +H2O to molecular_formula_alt
df['new_formula'] = df.apply(lambda row: string_formula_addition(row['molecular_formula_1'], 'H2O'), axis=1)
df['molecular_formula_alt_new'] = [','.join(filter(None, (a, b))) for a,b in zip(df['molecular_formula_alt'], df['new_formula'])]
df[['molecular_formula_alt', 'new_formula', 'molecular_formula_alt_new']]

In [None]:
# append +18.0106 to lcms_mass_alt
df['new_mass'] = df.apply(lambda row: f"{row['lcms_mass_1'] + 18.0106:.4f}", axis=1)
df['lcms_mass_alt_new'] = [','.join(filter(None, (a, b))) for a,b in zip(df['lcms_mass_alt'], df['new_mass'].astype(str))]
df[['lcms_mass_alt', 'new_mass', 'lcms_mass_alt_new']]

In [None]:
# update database
with con:
    con.executemany(
        "UPDATE virtuallibrary SET comment = ?, molecular_formula_alt = ?, lcms_mass_alt = ? WHERE id = ?",
        zip(df['comment'], df['molecular_formula_alt_new'], df['lcms_mass_alt_new'], df['id'])
    )
