## Targets from README.md
3. Inspect all reactions and add meta-data column with index of metal atom (s) and oxidation states.
4. Write routine to detect/create possbile ligand swaps given reaction complex. (init vs final) don't swap any ligands involved in bond breaking/forming. Make sure ligands swappable are only up to dent 2. (Check that charge detection is working correctly for all ligands in the complex.)

In [2]:
from architector import convert_io_molecule, view_structures
import architector.io_ptable as io_ptable
import pathlib
import mendeleev
import pandas as pd
import numpy as np

In [3]:
metaldf = pd.read_csv('rxn_m_swap_df.csv')

In [4]:
metaldf.columns

Index(['Unnamed: 0', 'metal', 'ox', 'uhf'], dtype='object')

In [5]:
df = pd.read_csv('../omol_mc_reactions.csv')

In [6]:
sdfs = [x for x in pathlib.Path('../reaction_complexes').rglob('*.sdf')]

In [7]:
len(sdfs)

126

In [8]:
# Force dataframe together.
sdf_paths = []
js = []
for i,row in df.iterrows():
    for j,sdf in enumerate(sdfs):
        match = sdf.name.replace('.sdf','')
        if match == row['Reaction_name']:
            sdf_paths.append(sdf)
            js.append(j)
            break
df['sdf_path'] = sdf_paths

In [9]:
# Duplicate of ROST57 with metal swap -> Skip
for i in range(len(sdfs)):
    if i not in js:
        print(sdfs[i])

../reaction_complexes/ROST58.sdf


In [10]:
def load_split_sdf(sdfpath):
    """Split SDF into initial/final strings based on $$$$ delimiter
    """
    with open(sdfpath,'r') as file1:
        lines = file1.readlines()
    out_sdfs = []
    for i,line in enumerate(lines):
        if i == 0:
            out_sdfs.append([line])
        elif '$$$$' in line:
            out_sdfs[-1].append(line)
            out_sdfs.append([])
        else:
            out_sdfs[-1].append(line)
    return [''.join(x) for x in out_sdfs if len(x) > 1]

In [11]:
def get_metal_tag_oxs(sdf):
    """Get the metal/ox tags from the sdf just in case.
    """
    metals = []
    oxs = []
    inds = []
    for l in sdf.splitlines():
        sline = l.split()
        if ('VAL=' in l):
            if (sline[3]) in io_ptable.all_metals:
                inds.append(int(sline[2]) -1)
                metals.append(sline[3])
                oxs.append(int(sline[-1].replace('VAL=',"")))
    if len(metals) == 0:
        mol = convert_io_molecule(sdf)
        inds = mol.find_metals()
        metals = [mol.ase_atoms.get_chemical_symbols()[x] for x in inds]
        oxs = [0] * len(metals)
    return metals, oxs, inds

In [12]:
sdf_lst = load_split_sdf(sdfs[0])
get_metal_tag_oxs(sdf_lst[0])

(['Al', 'Ti'], [3, 4], [0, 10])

In [13]:
df.iloc[5]

Reaction_name                              MOR6
Reactant(s)                             ED01+H2
Product(s)                                 PR06
sdf_path         ../reaction_complexes/MOR6.sdf
Name: 5, dtype: object

In [14]:
def rxn_breakdown(rxn, debug=False):
    """Take in initial/final sdfs and generate a reaction dictionary.

    Args:
        rxn (str): path to sdf file of reaction

    Returns:
        dict : dictionary of reaction.
    """
    # Load/split the sdfs into 0-initial, 1-final structure
    sdf_lst = load_split_sdf(rxn)
    # Find the metals/metal indexes.
    metals, _, inds = get_metal_tag_oxs(sdf_lst[0])
    # Take the oxiation state values from the original metal/ox dictionary.
    possible_oxs = [metaldf[metaldf.metal == x].ox.values for x in metals]
    # Make these into molecules
    init_mol = convert_io_molecule(sdf_lst[0])
    fin_mol = convert_io_molecule(sdf_lst[1])
    # Detect any bonds/broken/formed by indices.
    bonds_broken = [x for x in init_mol.BO_dict if x not in fin_mol.BO_dict]
    # Change to 0-indexed from 1-index
    bb = np.array(bonds_broken).reshape(-1) - 1
    bonds_formed = [x for x in fin_mol.BO_dict if x not in init_mol.BO_dict]
    bf = np.array(bonds_formed).reshape(-1) - 1
    # Split apart the molecule into ligands. Gives smiles/charge estimates along with indices.
    init_ld = init_mol.split_ligs()
    if debug:
        for key,val in init_ld.items():
            print(key,':',val)
    swappable_ligs = []
    # Find the swappable ligands following 2 basic filters.
    # 1. Not involved in any bondbreaking/forming during the reaction.
    # 2. Denticity is less than 3.
    for i,lig in enumerate(init_ld['original_lig_inds']):
        if (len(np.intersect1d(bf,lig)) > 0) or (len(np.intersect1d(bb,lig)) > 0):
            pass
        else:
            # Make sure denticity unique
            denticity = len(np.unique(init_ld['lig_metal_coordatoms'][i]))
            if (denticity < 3) & (denticity > 0): # Only swap less than tridentate
                lig_info = {
                    'smiles':init_ld['lig_smiles'][i],
                    'inds':lig,
                    # Indices in ligand that coordinate to metal
                    'coordlist':init_ld['lig_metal_coordatoms'][i],
                    'denticity': denticity,
                    # Metal indice(s) the ligand is bound to.
                    'metals_bound':init_ld['bound_metal_inds'][i],
                    'charge': init_ld['lig_charges'][i]
                }
                swappable_ligs.append(lig_info)
            elif denticity == 0:
                print('bond_breaking:',bb,'ligand_indices:',lig)
                print('bond_forming:',bf,'ligand_indices:',lig)
    # Assemble output dictionary
    out = {
        'metals':metals,
        'metal_inds':inds,
        'possible_metal_oxs':possible_oxs,
        'init_struct':sdf_lst[0],
        'final_struct':sdf_lst[1],
        'swappable_ligs':swappable_ligs,
        'pred_lig_charges':init_ld['lig_charges'],
        'pred_lig_smis':init_ld['lig_smiles']
    }
    return out


In [15]:
n = 21
rxn = df.iloc[n]['sdf_path']
out0 = rxn_breakdown(rxn,debug=True)

metal : ['Ir', 'Ir']
metal_ind : [2, 3]
lig_charges : [0, -1, -1, 0, 0, 0, 0]
lig_coord_ats : ['S,S', 'S,S', 'C', 'C', 'C', 'C']
original_lig_inds : [array([0, 1]), array([ 4,  8, 10, 11, 12, 22, 23, 24, 25, 26, 27, 28, 29, 30]), array([ 5,  9, 13, 14, 15, 31, 32, 33, 34, 35, 36, 37, 38, 39]), array([ 6, 19]), array([ 7, 18]), array([16, 20]), array([17, 21])]
mapped_smiles_inds : [[0, 1], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [0, 1], [0, 1], [0, 1], [0, 1]]
bound_metal_inds : [[], [2, 3], [2, 3], [3], [3], [2], [2]]
lig_smiles : ['II', '[S-]C(C)(C)C', '[S-]C(C)(C)C', '[C-]#[O+]', '[C-]#[O+]', '[C-]#[O+]', '[C-]#[O+]']
lig_metal_coordatoms : [[], [0, 0], [0, 0], [0], [0], [0], [0]]


In [16]:
n = 10
rxn = df.iloc[n]['sdf_path']
out0 = rxn_breakdown(rxn,debug=True)

metal : Pd
metal_ind : 28
lig_charges : [0, 0, -1, -1]
lig_coord_ats : ['N,N', 'C', 'I']
original_lig_inds : [array([0, 1, 2, 3, 4, 5, 6, 7]), array([ 8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
       25, 26, 27]), array([29, 30, 31, 32]), array([33])]
mapped_smiles_inds : [[0, 1, 2, 3, 4, 5, 6, 7], [5, 0, 1, 2, 15, 12, 13, 14, 4, 3, 6, 11, 10, 8, 9, 19, 18, 7, 16, 17], [0, 1, 2, 3], [0]]
bound_metal_inds : [[], [28], [28], [28]]
lig_smiles : ['CC', 'c1ccc(nc1)c1ccccn1', '[CH3-]', '[I-]']
lig_metal_coordatoms : [[], [4, 11], [0], [0]]


In [17]:
n = 0
rxn = df.iloc[n]['sdf_path']
out0 = rxn_breakdown(rxn,debug=True)

metal : Cr
metal_ind : 2
lig_charges : [0, 0, 0, 0, 0, 0]
lig_coord_ats : ['C', 'C', 'C', 'C', 'C']
original_lig_inds : [array([0, 1]), array([3, 4]), array([5, 6]), array([7, 8]), array([ 9, 10]), array([11, 12])]
mapped_smiles_inds : [[2, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1]]
bound_metal_inds : [[], [2], [2], [2], [2], [2]]
lig_smiles : ['C#[O]', '[C-]#[O+]', '[C-]#[O+]', '[C-]#[O+]', '[C-]#[O+]', '[C-]#[O+]']
lig_metal_coordatoms : [[], [0], [0], [0], [0], [0]]


In [18]:
def view_initmol(rxn_dict):
    """View structures overload to make structures more viewable/digestable for human.
    
    1. Metal centers will be tagged with `Chemical_Symbol`+`_Charge:Possible_Ox_States` + `_LigandCharge:Total_LigandCharges`.
    2. Swappable ligands will be tagged with `Index`+'_Charge:LigandCharge`+'_D:LigandDenticity`
    3. Everything else will not be tagged.

    Args:
        rxn_dict (dictionary): Reaction dictionary.
    """
    init_mol = convert_io_molecule(rxn_dict['init_struct'])
    labelinds = [None] * len(init_mol.ase_atoms)
    for i,lig in enumerate(rxn_dict['swappable_ligs']):
        labelinds[lig['inds'][0]] = str(i) + '_C:'+str(lig['charge']) +'_D:' + str(lig['denticity'])
    for i,metal in enumerate(rxn_dict['metals']):
        labelinds[rxn_dict['metal_inds'][i]] = metal + str(rxn_dict['possible_metal_oxs'][i]) + '_LC:'+str(np.sum(rxn_dict['pred_lig_charges']))
    view_structures(init_mol,labelinds=labelinds)

In [19]:
n = 0
rxn = df.iloc[n]['sdf_path']
out0 = rxn_breakdown(rxn)
view_initmol(out0)

# ## Edits, if any required done between steps.

# Checking for many things:
# 1. Ligand Charges are sensible / correctly assigned for swappable ligands. If the ligand smiles are wrong that doesn't matter as much as the charge state.
# 2. Metal oxidation states are sensible for the complex. Flag when total charge of ligands + metal center are less than -2 or greater than +4.
# 3. Ligand denticity and location flagging are correct for reactions. Make sure that reacting ligands are not swapped.
# 4. Symmetry of ligands assuming only a single ligand swap will be performed at a time. E.g. Trans vs 4*Cis in octahedral opposite a new binding site only gives
# 2 unique ligand replacement locations 

# Ways to edit:

# ## Metal Oxs edit
out0['possible_metal_oxs'] = [[2,3]]

# ## Swappable ligand charges change
# out['swappable_ligs'][i]['charge'] = []
# out['swappable_ligs'][i]['charge'] = []

# Reduce redundant ligands for swaps with only single swap
del out0['swappable_ligs'][4]
del out0['swappable_ligs'][3]
del out0['swappable_ligs'][0]

# ## Ligands Total Charge change (in case non-swappable ligand charge is tagged incorrectly)
# out['pred_lig_charges'] = []
view_initmol(out0)

In [20]:
n = 1
rxn = df.iloc[n]['sdf_path']
out1 = rxn_breakdown(rxn)
view_initmol(out1)
# Reduce redundant ligands for swaps with only single swap
del out1['swappable_ligs'][3]
del out1['swappable_ligs'][2]
view_initmol(out1)
# All good

In [21]:
n = 2
rxn = df.iloc[n]['sdf_path']
out2 = rxn_breakdown(rxn)
view_initmol(out2)
# Reduce redundant ligands for swaps with only single swap
del out2['swappable_ligs'][2]
del out2['swappable_ligs'][1]
view_initmol(out2)
# All good

In [22]:
n = 3
rxn = df.iloc[n]['sdf_path']
out3 = rxn_breakdown(rxn)
view_initmol(out3)

# ## Ligands Total Charge change
# Cp -1, ethyne -1 (*2) = -3
out3['pred_lig_charges'] = [-3]
view_initmol(out3)

In [23]:
n = 4
rxn = df.iloc[n]['sdf_path']
out4 = rxn_breakdown(rxn)
view_initmol(out4)

# ## Edits, if any
# ## Metal Oxs edit to keep charge lt +5.
out4['possible_metal_oxs'] = [[2,3,4]]

# Reduce redundant ligands for swaps with only single swap
del out4['swappable_ligs'][3]

view_initmol(out4)

In [24]:
n = 5
rxn = df.iloc[n]['sdf_path']
out5 = rxn_breakdown(rxn)
view_initmol(out5)

# ## Edits, if any
# ## Metal Oxs edit to keep charge lt +5
out5['possible_metal_oxs'] = [[2,3]]

# Reduce redundant ligands for swaps with only single swap
del out5['swappable_ligs'][4]
del out5['swappable_ligs'][3]
del out5['swappable_ligs'][2]

view_initmol(out5)

In [25]:
n = 6
rxn = df.iloc[n]['sdf_path']
out6 = rxn_breakdown(rxn)
view_initmol(out6)

# ## Edits, if any
# ## Metal Oxs edit to keep charge lt +5
out6['possible_metal_oxs'] = [[4]]


# Reduce redundant ligands for swaps with only single swap
del out6['swappable_ligs'][4]
del out6['swappable_ligs'][2]

view_initmol(out6)

In [26]:
n = 7
rxn = df.iloc[n]['sdf_path']
out7 = rxn_breakdown(rxn)
view_initmol(out7)

# Reduce redundant ligands for swaps with only single swap
del out7['swappable_ligs'][3]
view_initmol(out7)

In [27]:
n = 8
rxn = df.iloc[n]['sdf_path']
out8 = rxn_breakdown(rxn)
view_initmol(out8)
# Reduce redundant ligands for swaps with only single swap
del out8['swappable_ligs'][2]
view_initmol(out8)

In [28]:
n = 9
rxn = df.iloc[n]['sdf_path']
out9 = rxn_breakdown(rxn)
view_initmol(out9)
# All good

In [29]:
n = 10
rxn = df.iloc[n]['sdf_path']
out10 = rxn_breakdown(rxn)
view_initmol(out10)
# All good

In [30]:
n = 11
rxn = df.iloc[n]['sdf_path']
out11 = rxn_breakdown(rxn)
view_initmol(out11)

# ## Edits, if any
# ## Metal Oxs edit to keep lt +4 charge total.
out11['possible_metal_oxs'] = [[2,3]]

# Reduce redundant ligands for swaps with only single swap
del out11['swappable_ligs'][4]
del out11['swappable_ligs'][3]
del out11['swappable_ligs'][2]

view_initmol(out11)

In [31]:
n = 12
rxn = df.iloc[n]['sdf_path']
out12 = rxn_breakdown(rxn)
view_initmol(out12)
# All good

In [32]:
n = 13
rxn = df.iloc[n]['sdf_path']
out13 = rxn_breakdown(rxn)
view_initmol(out13)
# All good

In [33]:
n = 14
rxn = df.iloc[n]['sdf_path']
out14 = rxn_breakdown(rxn)
view_initmol(out14)
# All good

In [34]:
n = 15
rxn = df.iloc[n]['sdf_path']
out15 = rxn_breakdown(rxn)
view_initmol(out15)

print(out15['pred_lig_smis'])
# ## Edits, if any
# ## Metal Oxs edit to keep above -3.
out15['possible_metal_oxs'] = [[4]]

# ## Ligands Total Charge change
# N-C[-2]-N -2 (*2), Cl-, C6-, = -6
out15['pred_lig_charges'] = [-6]
view_initmol(out15)

['[Cl-]', 'CC(N1C=CN([C-2]1)C(C)(C)C)(C)C', 'Cc1cc[c-]cc1', 'CC(N1C=CN([C]1)C(C)(C)C)(C)C']


In [35]:
n = 16
rxn = df.iloc[n]['sdf_path']
out16 = rxn_breakdown(rxn)
view_initmol(out16)

print(out16['pred_lig_smis'])
# All good

['CCN(Cc1cccc(n1)C=[P](C(C)(C)C)C(C)(C)C)CC', '[C-]#[O+]', '[H-]', 'CO']


In [36]:
n = 17
rxn = df.iloc[n]['sdf_path']
out17 = rxn_breakdown(rxn)
view_initmol(out17)

# Reduce redundant ligands for swaps with only single swap
del out17['swappable_ligs'][1]

view_initmol(out17)
# All good

In [37]:
n = 18
rxn = df.iloc[n]['sdf_path']
out18 = rxn_breakdown(rxn)
view_initmol(out18)
# Reduce redundant ligands for swaps with only single swap
del out18['swappable_ligs'][1]
view_initmol(out18)

In [38]:
n = 19
rxn = df.iloc[n]['sdf_path']
out19 = rxn_breakdown(rxn)
view_initmol(out19)
# Reduce redundant ligands for swaps with only single swap
del out19['swappable_ligs'][1]
view_initmol(out19)

In [39]:
n = 20
rxn = df.iloc[n]['sdf_path']
out20 = rxn_breakdown(rxn)
view_initmol(out20)

# Reduce redundant ligands for swaps with only single swap
del out20['swappable_ligs'][1]
view_initmol(out20)

In [40]:
n = 21
rxn = df.iloc[n]['sdf_path']
out21 = rxn_breakdown(rxn)
view_initmol(out21)

# ## Metal Oxs edit
# Charges should be less than +4
out21['possible_metal_oxs'] = [[3],[3]]

# Reduce redundant ligands for swaps with only single swap
del out21['swappable_ligs'][5]
del out21['swappable_ligs'][4]
del out21['swappable_ligs'][3]
del out21['swappable_ligs'][1]


view_initmol(out21)

In [41]:
n = 22
rxn = df.iloc[n]['sdf_path']
out22 = rxn_breakdown(rxn)
view_initmol(out22)
# Reduce redundant ligands for swaps with only single swap
del out22['swappable_ligs'][1]
view_initmol(out22)

In [42]:
n = 23
rxn = df.iloc[n]['sdf_path']
out23 = rxn_breakdown(rxn)
view_initmol(out23)
# Reduce redundant ligands for swaps with only single swap
del out23['swappable_ligs'][1]
view_initmol(out23)

In [43]:
n = 24
rxn = df.iloc[n]['sdf_path']
out24 = rxn_breakdown(rxn)
view_initmol(out24)
# Reduce redundant ligands for swaps with only single swap
del out24['swappable_ligs'][1]
view_initmol(out24)

In [44]:
n = 25
rxn = df.iloc[n]['sdf_path']
out25 = rxn_breakdown(rxn)
view_initmol(out25)

# ## Metal Oxs edit
# Charges should be gt -2
out25['possible_metal_oxs'] = [[4]]

# Reduce redundant ligands for swaps with only single swap
del out25['swappable_ligs'][1]

view_initmol(out25)

In [45]:
n = 26
rxn = df.iloc[n]['sdf_path']
out26 = rxn_breakdown(rxn)
view_initmol(out26)
# All good

In [46]:
n = 27
rxn = df.iloc[n]['sdf_path']
out27 = rxn_breakdown(rxn)
view_initmol(out27)
# All good

In [47]:
n = 28
rxn = df.iloc[n]['sdf_path']
out28 = rxn_breakdown(rxn)
view_initmol(out28)
# All good

In [48]:
n = 29
rxn = df.iloc[n]['sdf_path']
out29 = rxn_breakdown(rxn)
view_initmol(out29)
# Reduce redundant ligands for swaps with only single swap
del out29['swappable_ligs'][1]

view_initmol(out29)

In [49]:
n = 30
rxn = df.iloc[n]['sdf_path']
out30 = rxn_breakdown(rxn)
view_initmol(out30)
# Reduce redundant ligands for swaps with only single swap
del out30['swappable_ligs'][1]

view_initmol(out30)

In [50]:
n = 31
rxn = df.iloc[n]['sdf_path']
out31 = rxn_breakdown(rxn)
view_initmol(out31)
# Reduce redundant ligands for swaps with only single swap
del out31['swappable_ligs'][1]

view_initmol(out31)

In [51]:
n = 32
rxn = df.iloc[n]['sdf_path']
out32 = rxn_breakdown(rxn)
view_initmol(out32)
# Reduce redundant ligands for swaps with only single swap
del out32['swappable_ligs'][3]
del out32['swappable_ligs'][1]
view_initmol(out32)

In [52]:
n = 33
rxn = df.iloc[n]['sdf_path']
out33 = rxn_breakdown(rxn)
view_initmol(out33)
# Reduce redundant ligands for swaps with only single swap
del out33['swappable_ligs'][3]
del out33['swappable_ligs'][1]
view_initmol(out33)

In [53]:
n = 34
rxn = df.iloc[n]['sdf_path']
out34 = rxn_breakdown(rxn)
view_initmol(out34)
# Reduce redundant ligands for swaps with only single swap
del out34['swappable_ligs'][3]
del out34['swappable_ligs'][1]
view_initmol(out34)

In [54]:
n = 35
rxn = df.iloc[n]['sdf_path']
out35 = rxn_breakdown(rxn)
view_initmol(out35)

# All good

In [55]:
n = 36
rxn = df.iloc[n]['sdf_path']
out36 = rxn_breakdown(rxn)
view_initmol(out36)

# All good

In [56]:
n = 37
rxn = df.iloc[n]['sdf_path']
out37 = rxn_breakdown(rxn)
view_initmol(out37)

# ## Metal Oxs edit
# Charges should be less than +4
out37['possible_metal_oxs'] = [[4]]

# Symmetry reduce for single swap
del out37['swappable_ligs'][2]
del out37['swappable_ligs'][1]

view_initmol(out37)

In [57]:
n = 38
rxn = df.iloc[n]['sdf_path']
out38 = rxn_breakdown(rxn)
view_initmol(out38)

# ## Metal Oxs edit
# Charges should be less than +4
out38['possible_metal_oxs'] = [[4]]
# Symmetry reduce for single swap
del out38['swappable_ligs'][2]
del out38['swappable_ligs'][1]

view_initmol(out38)

In [58]:
n = 39
rxn = df.iloc[n]['sdf_path']
out39 = rxn_breakdown(rxn)
view_initmol(out39)
view_structures(out39['final_struct'])

# Symmetry reduce for single swap
del out39['swappable_ligs'][3]
del out39['swappable_ligs'][2]
del out39['swappable_ligs'][1]

view_initmol(out39)

In [59]:
n = 40
rxn = df.iloc[n]['sdf_path']
out40 = rxn_breakdown(rxn)
view_initmol(out40)
view_structures(out40['final_struct'])
# Symmetry reduce for single swap
del out40['swappable_ligs'][3]
del out40['swappable_ligs'][1]
view_initmol(out40)

In [60]:
n = 41
rxn = df.iloc[n]['sdf_path']
out41 = rxn_breakdown(rxn)
view_initmol(out41)
view_structures(out41['final_struct'])
# All good

In [61]:
n = 42
rxn = df.iloc[n]['sdf_path']
out42 = rxn_breakdown(rxn)
view_initmol(out42)
# All good

In [62]:
n = 43
rxn = df.iloc[n]['sdf_path']
out43 = rxn_breakdown(rxn)
view_initmol(out43)

print(out43['pred_lig_smis'])


# ## Ligands Total Charge change
# Cp -1 (*2), H- (*1), O- (*1), CH- , -5 is correct!

# Ox edit to be gt -3.
out43['possible_metal_oxs'] = [[3,4]]

view_initmol(out43)

['[cH-]1cccc1', '[cH-]1cccc1', '[O-]C[CH-]c1ccccc1', '[H-]']


In [63]:
n = 44
rxn = df.iloc[n]['sdf_path']
out44 = rxn_breakdown(rxn)
view_initmol(out44)
# All good

In [64]:
n = 45
rxn = df.iloc[n]['sdf_path']
out45 = rxn_breakdown(rxn)
view_initmol(out45)

# ## Metal Oxs edit
# Charges should be less than +4
out45['possible_metal_oxs'] = [[2,3]]

view_initmol(out45)

In [65]:
n = 46
rxn = df.iloc[n]['sdf_path']
out46 = rxn_breakdown(rxn)
view_initmol(out46)

# ## Metal Oxs edit
# Charges should be less than +4
out46['possible_metal_oxs'] = [[2,3]]

view_initmol(out46)

In [66]:
n = 47
rxn = df.iloc[n]['sdf_path']
out47 = rxn_breakdown(rxn)
view_initmol(out47)

# ## Metal Oxs edit
# Charges should be less than +4
out47['possible_metal_oxs'] = [[2,3]]

view_initmol(out47)

In [67]:
n = 48
rxn = df.iloc[n]['sdf_path']
out48 = rxn_breakdown(rxn)
view_initmol(out48)
# All good

In [68]:
n = 49
rxn = df.iloc[n]['sdf_path']
out49 = rxn_breakdown(rxn)
view_initmol(out49)
# All good

In [69]:
n = 50
rxn = df.iloc[n]['sdf_path']
out50 = rxn_breakdown(rxn)
view_initmol(out50)
# All good

In [70]:
n = 51
rxn = df.iloc[n]['sdf_path']
out51 = rxn_breakdown(rxn)
view_initmol(out51)
# All good

In [71]:
n = 52
rxn = df.iloc[n]['sdf_path']
out52 = rxn_breakdown(rxn)
view_initmol(out52)
# All good

In [72]:
n = 53
rxn = df.iloc[n]['sdf_path']
out53 = rxn_breakdown(rxn)
view_initmol(out53)

# ## Metal Oxs edit
# Charges should be less than +4
out53['possible_metal_oxs'] = [[2,3]]

view_initmol(out53)

In [73]:
n = 54
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out54 = rxn_breakdown(rxn)
view_initmol(out54)
print(out54['pred_lig_smis'])

# Reduce symmetry for single swap
del out54['swappable_ligs'][1]
view_initmol(out54)

Reaction_name                              ROST13
Reactant(s)                           m19+m20+m21
Product(s)                                m22+m23
sdf_path         ../reaction_complexes/ROST13.sdf
Name: 54, dtype: object


['[OH-]', 'O=N/C(=C(\\N=O)/C)/C', 'O/N=C(/C(=N/O)/C)\\C', 'O', '[CH3-]', 'O']


In [74]:
n = 55
rxn = df.iloc[n]['sdf_path']
out55 = rxn_breakdown(rxn)
view_initmol(out55)
view_structures(out55['final_struct'])

# Reduce symmetry for single swap
del out55['swappable_ligs'][4]
del out55['swappable_ligs'][1]
view_initmol(out55)

In [75]:
n = 56
rxn = df.iloc[n]['sdf_path']
out56 = rxn_breakdown(rxn)
view_initmol(out56)
view_structures(out56['final_struct'])
# Reduce symmetry for single swap
del out56['swappable_ligs'][4]
del out56['swappable_ligs'][1]
view_initmol(out56)

In [76]:
n = 57
rxn = df.iloc[n]['sdf_path']
out57 = rxn_breakdown(rxn)
view_initmol(out57)
# Reduce symmetry for single swap
del out57['swappable_ligs'][1]
view_initmol(out57)

In [77]:
n = 58
rxn = df.iloc[n]['sdf_path']
out58 = rxn_breakdown(rxn)
view_initmol(out58)
# All good

In [78]:
n = 59
rxn = df.iloc[n]['sdf_path']
out59 = rxn_breakdown(rxn)
view_initmol(out59)
view_structures(out59['init_struct'],labelinds=True)
view_structures(out59['final_struct'],labelinds=True)

print(out59['pred_lig_smis'])
# Cl- tagged as Cl-3

# Change ligand charge total
out59['pred_lig_charges'] = [-2]

# Reduce Symmetry
del out59['swappable_ligs'][2]
view_initmol(out59)

['[Cl-3]', 'C[c-]1cccc1', '[C-]#[O+]', '[C-]#[O+]', '[C-]#[O+]']


In [79]:
n = 60
rxn = df.iloc[n]['sdf_path']
out60 = rxn_breakdown(rxn)
view_initmol(out60)
# All good

In [80]:
n = 61
rxn = df.iloc[n]['sdf_path']
out61 = rxn_breakdown(rxn)
view_initmol(out61)
# All good

In [81]:
n = 62
rxn = df.iloc[n]['sdf_path']
out62 = rxn_breakdown(rxn)
view_initmol(out62)
# All good

In [82]:
n = 63
rxn = df.iloc[n]['sdf_path']
out63 = rxn_breakdown(rxn)
view_initmol(out63)
# All good

In [83]:
n = 64
rxn = df.iloc[n]['sdf_path']
out64 = rxn_breakdown(rxn)
view_initmol(out64)
# view_structures(out36['final_struct'])

print(out64['pred_lig_charges'])
print(out64['pred_lig_smis'])

# Si ligands = -1 (*2), N ligand = -1, oxyen ligand = +1 -> -2 ligand total charge
out64['pred_lig_charges'] = [-2]
view_initmol(out64)

[3, -1, -1, -1]
['CC[O+]([H+][O+](CC)CC)CC', 'C/C(=C/C(=N/c1c(C)cccc1C)/C)/[N-]c1c(C)cccc1C', '[CH2-][Si](C)(C)C', '[CH2-][Si](C)(C)C']


In [84]:
n = 65
rxn = df.iloc[n]['sdf_path']
out65 = rxn_breakdown(rxn)
view_initmol(out65)
print(out65['pred_lig_smis'])
# This is all fine, surprisingly.

['CP(C)C', 'CP(c1ccccc1[Si-](c1ccccc1P(C)C)c1ccccc1P(C)C)C', 'N#N']


In [85]:
n = 66
rxn = df.iloc[n]['sdf_path']
out66 = rxn_breakdown(rxn)
view_initmol(out66)
# looks good

In [86]:
n = 67
rxn = df.iloc[n]['sdf_path']
out67 = rxn_breakdown(rxn)
view_initmol(out67)
# view_structures(out36['final_struct'])
print(out67['pred_lig_smis'])
# Cl- tagged as Cl-3
# ## Ligands Total Charge change
out67['pred_lig_charges'] = [-3]
view_initmol(out67)

['[Cl-3]', '[cH-]1cccc1', 'O=C(CC(c1[cH-]ccc1)(C)C)NCc1ccccc1']


In [87]:
n = 68
rxn = df.iloc[n]['sdf_path']
out68 = rxn_breakdown(rxn)
view_initmol(out68)
# Reduce symmetry for single swaps
del out68['swappable_ligs'][1]
view_initmol(out68)

In [88]:
n = 69
rxn = df.iloc[n]['sdf_path']
out69 = rxn_breakdown(rxn)
view_initmol(out69)
# Reduce symmetry for single swaps
del out69['swappable_ligs'][0]
view_initmol(out69)

In [89]:
n = 70
rxn = df.iloc[n]['sdf_path']
out70 = rxn_breakdown(rxn)
view_initmol(out70)
# view_structures(out36['final_struct'])

print(out70['pred_lig_smis'])
# Actually correct, undersaturated carbon out there.

# ## Metal Oxs edit
# Charges should be gt than -3
out70['possible_metal_oxs'] = [[3,4]]
view_initmol(out70)


['[O-]C[C@@H]1CC(C[C@@H]1[C-](C)C)(C(=O)OC)C(=O)OC', '[Cl-]', '[cH-]1cccc1', '[cH-]1cccc1']


In [90]:
n = 71
rxn = df.iloc[n]['sdf_path']
out71 = rxn_breakdown(rxn)
view_initmol(out71)
# All good

In [91]:
n = 72
rxn = df.iloc[n]['sdf_path']
out72 = rxn_breakdown(rxn)
view_initmol(out72)

# ## Metal Oxs edit
# Charges should be gt -3
out72['possible_metal_oxs'] = [[3,4]]
view_initmol(out72)

In [92]:
n = 73
rxn = df.iloc[n]['sdf_path']
out73 = rxn_breakdown(rxn)
view_initmol(out73)
# Reduce symmetry
del out73['swappable_ligs'][1]
view_initmol(out73)

In [93]:
n = 74
rxn = df.iloc[n]['sdf_path']
out74 = rxn_breakdown(rxn)
view_initmol(out74)
# All good

In [94]:
n = 75
rxn = df.iloc[n]['sdf_path']
out75 = rxn_breakdown(rxn)
view_initmol(out75)
# All good

In [95]:
n = 76
rxn = df.iloc[n]['sdf_path']
out76 = rxn_breakdown(rxn)
view_initmol(out76)
# All good

In [96]:
n = 77
rxn = df.iloc[n]['sdf_path']
out77 = rxn_breakdown(rxn)
view_initmol(out77)
# All good

In [97]:
n = 78
rxn = df.iloc[n]['sdf_path']
out78 = rxn_breakdown(rxn)
view_initmol(out78)
# view_structures(out36['final_struct'])

print(out78['pred_lig_smis'])
# Actually correct, aromatic between Ch- group and P+ group

# Reduce symmetry for swaps
del out78['swappable_ligs'][3]
del out78['swappable_ligs'][1]
view_initmol(out78)


['c1ccc(cc1)P(c1ccccc1)c1ccccc1', 'O[C@H](C(=[CH-])[P+](C)(C)C)C#C', '[Cl-]', '[Cl-]', 'CP(C)C', 'CP(C)C']


In [98]:
n = 79
rxn = df.iloc[n]['sdf_path']
out79 = rxn_breakdown(rxn)
view_initmol(out79)
# All good

In [99]:
n = 80
rxn = df.iloc[n]['sdf_path']
out80 = rxn_breakdown(rxn)
view_initmol(out80)

print(out80['pred_lig_smis'])
# Nitrogens on non-bound ligand should be -1 (*2) ligand charge = -5

out80['pred_lig_charges'] = [-5]
view_initmol(out80)

['COCCOC', '[Cl-]', '[Cl-]', '[Cl-]', 'C[Si]1(C)CP(C[Si](C)(C)[N][Si](CP(C[Si]([N]1)(C)C)c1ccccc1)(C)C)c1ccccc1']


In [100]:
n = 81
rxn = df.iloc[n]['sdf_path']
out81 = rxn_breakdown(rxn)
view_initmol(out81)

# 1 normal Cp = -1, 1 Cp is further deprotonated, -2
out81['pred_lig_charges'] = [-3]
view_initmol(out81)

In [101]:
n = 82
rxn = df.iloc[n]['sdf_path']
out82 = rxn_breakdown(rxn)
view_initmol(out82)
# All good

In [102]:
n = 83
rxn = df.iloc[n]['sdf_path']
out83 = rxn_breakdown(rxn)
view_initmol(out83)
# All good

In [103]:
n = 84
rxn = df.iloc[n]['sdf_path']
out84 = rxn_breakdown(rxn)
view_initmol(out84)
# All good

In [104]:
n = 85
rxn = df.iloc[n]['sdf_path']
out85 = rxn_breakdown(rxn)
view_initmol(out85)
# All good

In [105]:
n = 86
rxn = df.iloc[n]['sdf_path']
out86 = rxn_breakdown(rxn)
view_initmol(out86)
# All good

In [106]:
n = 87
rxn = df.iloc[n]['sdf_path']
out87 = rxn_breakdown(rxn)
view_initmol(out87)
# All good

In [107]:
n = 88
rxn = df.iloc[n]['sdf_path']
out88 = rxn_breakdown(rxn)
view_initmol(out88)
# All good

In [108]:
n = 89
rxn = df.iloc[n]['sdf_path']
out89 = rxn_breakdown(rxn)
view_initmol(out89)
# All good

In [109]:
n = 90
rxn = df.iloc[n]['sdf_path']
out90 = rxn_breakdown(rxn)
view_initmol(out90)
view_structures(out90['final_struct'])
# All good

In [110]:
n = 91
rxn = df.iloc[n]['sdf_path']
out91 = rxn_breakdown(rxn)
view_initmol(out91)
# All good

In [111]:
n = 92
rxn = df.iloc[n]['sdf_path']
out92 = rxn_breakdown(rxn)
view_initmol(out92)
# All good

In [112]:
n = 93
rxn = df.iloc[n]['sdf_path']
out93 = rxn_breakdown(rxn)
print(convert_io_molecule(out93['init_struct']).ase_atoms.get_atomic_numbers().sum() - 1 )
view_initmol(out93)
# All good

222


In [113]:
n = 94
rxn = df.iloc[n]['sdf_path']
out94 = rxn_breakdown(rxn)
view_initmol(out94)

print(out94['pred_lig_smis'])
# 2 ligands should be -1, with -2 -> closed shell system.
print(convert_io_molecule(out94['init_struct']).ase_atoms.get_atomic_numbers().sum() - 1 )
out94['pred_lig_charges'] = [-2]
view_initmol(out94)

['CN(c1ccccc1[CH2-])C', 'C=[N]1CCCCC1', 'CCCCC=C']
196


In [114]:
n = 95
rxn = df.iloc[n]['sdf_path']
out95 = rxn_breakdown(rxn)
view_initmol(out95)
# All right

In [115]:
n = 96
rxn = df.iloc[n]['sdf_path']
out96 = rxn_breakdown(rxn)
view_initmol(out96)
# All right

In [116]:
n = 97
rxn = df.iloc[n]['sdf_path']
out97 = rxn_breakdown(rxn)
view_initmol(out97)
print(out97['pred_lig_charges'])
# All right ligands
# Metal charges beyond 6 bad
out97['possible_metal_oxs'] = [[2,3,4]]
view_initmol(out97)

[-1, 0, 0, 0]


In [117]:
n = 98
rxn = df.iloc[n]['sdf_path']
out98 = rxn_breakdown(rxn)
view_initmol(out98)
# All right

In [118]:
n = 99
rxn = df.iloc[n]['sdf_path']
out99 = rxn_breakdown(rxn)
view_initmol(out99)
# All right

In [119]:
n = 100
rxn = df.iloc[n]['sdf_path']
out100 = rxn_breakdown(rxn)
view_initmol(out100)
print(out100['pred_lig_smis'])
# All right

['[C-]#[O+]', 'Cc1cc(C)c(c(c1)C)N1C=CN([C-2]1)[CH-]c1ccccn1', 'C[c-]1c(C)c(c(c1C)C)C']


In [120]:
n = 101
rxn = df.iloc[n]['sdf_path']
out101 = rxn_breakdown(rxn)
view_initmol(out101)
# All right

In [121]:
n = 102
rxn = df.iloc[n]['sdf_path']
out102 = rxn_breakdown(rxn)
view_initmol(out102)
print(out102['pred_lig_smis'])
# All right

['[O-]C#N', 'C[N-]C(C)(C)C', 'C[N-]C(C)(C)C', 'C[N-]C(C)(C)C']


In [122]:
n = 103
rxn = df.iloc[n]['sdf_path']
out103 = rxn_breakdown(rxn)
view_initmol(out103)
print(out103['pred_lig_smis'])
# All right

['C/C(=C/C(=N/c1ccccc1)/C)/[N-]c1ccccc1', '[N-2]C', 'Fc1ccccc1']


In [123]:
n = 104
rxn = df.iloc[n]['sdf_path']
out104 = rxn_breakdown(rxn)
view_initmol(out104)
print(out104['pred_lig_smis'])
# Reduce Symmetry
del out104['swappable_ligs'][2]
view_initmol(out104)

['[SiH2-]C', 'CP(C)C', 'CP(C)C', '[N-2]c1ccccc1', '[H-]', 'CC(=O)C']


In [124]:
n = 105
rxn = df.iloc[n]['sdf_path']
out105 = rxn_breakdown(rxn)
view_initmol(out105)
print(out105['pred_lig_smis'])
# All right

['CP(Cc1cccc2c1nc1c(cccc1c2)CP(C)C)C', '[C-]#[O+]', '[H-]', '[H-]']


In [125]:
n = 106
rxn = df.iloc[n]['sdf_path']
out106 = rxn_breakdown(rxn)
view_initmol(out106)
print(out106['pred_lig_smis'])
# All right

['CN(Cc1cccc(n1)C=[P](C)C)C', '[C-]#[O+]', '[H-]', 'O']


In [126]:
n = 107
rxn = df.iloc[n]['sdf_path']
out107 = rxn_breakdown(rxn)
view_initmol(out107)
print(convert_io_molecule(out107['init_struct']).ase_atoms.get_atomic_numbers().sum())
print(out107['pred_lig_smis'])
# Reduce symmetry
del out107['swappable_ligs'][1]
view_initmol(out107)

116
['[CH-2]/C=C\\C=[CH-]', 'P', 'P']


In [127]:
n = 108
rxn = df.iloc[n]['sdf_path']
out108 = rxn_breakdown(rxn)
view_initmol(out108)
print(out108['pred_lig_smis'])
# Metal charge 0 not allowed
out108['possible_metal_oxs'] = [[2,4]]
view_initmol(out108)

['[cH-]1cccc1', '[CH-2]/C=C\\C=[CH-]']


In [128]:
n = 109
rxn = df.iloc[n]['sdf_path']
out109 = rxn_breakdown(rxn)
view_initmol(out109)
print(out109['pred_lig_smis'])
# Reduce symmetry
del out109['swappable_ligs'][1]
view_initmol(out109)

['Brc1ccc(cc1)/C=C/c1ccncc1', 'CCP(CC)CC', 'CCP(CC)CC', 'CCP(CC)CC']


In [129]:
n = 110
rxn = df.iloc[n]['sdf_path']
out110 = rxn_breakdown(rxn)
view_initmol(out110)
print(out110['pred_lig_smis'])
# Reduce symmetry
del out110['swappable_ligs'][1]
view_initmol(out110)

['Brc1ccc(cc1)C#Cc1ccncc1', 'CCP(CC)CC', 'CCP(CC)CC', 'CCP(CC)CC']


In [130]:
n = 111
rxn = df.iloc[n]['sdf_path']
out111 = rxn_breakdown(rxn)
view_initmol(out111)
print(out111['pred_lig_smis'])
view_structures(out111['final_struct'])
# Reduce symmetry
del out111['swappable_ligs'][2]
del out111['swappable_ligs'][1]
# All right
view_initmol(out111)

['C[N-]C', 'C[N-]C', 'C[N-]C', 'CC([N-]/C=N/C(C)C)C', '[CH2-]C=C']


In [131]:
n = 112
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out112 = rxn_breakdown(rxn)
view_initmol(out112)
print(out112['pred_lig_smis'])

# Swappable ligand charge (NO) is wrong -> should be 0 to match with data tag in https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 

# ## Swappable ligand charges
out112['swappable_ligs'][0]['charge'] = 0

# Update total ligcharge value
out112['pred_lig_charges'] = [-3]

view_initmol(out112)


Reaction_name                              MOBH23
Reactant(s)                                   r23
Product(s)                                    p23
sdf_path         ../reaction_complexes/MOBH23.sdf
Name: 112, dtype: object


['C[c-]1c(C)c(c(c1C)C)C', '[CH2-]C=C(C)C', 'N#[O+]', 'c1[c-]cccc1']


In [132]:
n = 113
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out113 = rxn_breakdown(rxn)
view_initmol(out113)

# This one is very tricky. It's probably supposed to be Re(II)
# Trying to match reported for https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 
print(out113['pred_lig_smis'])

out113['possible_metal_oxs'] = [[2,4]]

# The ligand charge should be -1 (Bridging CH should be negative)
# Update total ligcharge value
out113['pred_lig_charges'] = [-1]

view_initmol(out113)

Reaction_name                              MOBH24
Reactant(s)                                   r24
Product(s)                                    p24
sdf_path         ../reaction_complexes/MOBH24.sdf
Name: 113, dtype: object


['CC(P(C(C)(C)C)Cc1cccc(n1)C=[P](C(C)(C)C)C(C)(C)C)(C)C', '[C-]#[O+]', '[C-]#[O+]', 'N#Cc1ccccc1']


In [133]:
n = 114
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out114 = rxn_breakdown(rxn)
view_initmol(out114)

# This one is very tricky again. It's probably supposed to be Re(II)
# Trying to match reported for https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 
print(out114['pred_lig_smis'])

# Add 2 to oxpool, remove 7
out114['possible_metal_oxs'] = [[2,4]]

# The ligand charge should be -1 (Bridging CH should be negative)
# Update total ligcharge value
out114['pred_lig_charges'] = [-1]

view_initmol(out114)

Reaction_name                              MOBH25
Reactant(s)                                   r25
Product(s)                                    p25
sdf_path         ../reaction_complexes/MOBH25.sdf
Name: 114, dtype: object


['CC(P(C(C)(C)C)Cc1cccc(n1)C=[P](C(C)(C)C)C(C)(C)C)(C)C', '[C-]#[O+]', '[C-]#[O+]', 'N#CC(C)(C)C']


In [134]:
n = 115
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out115 = rxn_breakdown(rxn)
view_initmol(out115)

view_structures(out115['final_struct'])
# # This one is very tricky again. It's probably supposed to be Os(III)
# # Trying to match reported for https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 
print(out115['pred_lig_smis'])

# Chosing to do a range of oxidation states instead.
out115['possible_metal_oxs'] = [[2,3,4]]

# Reduce symmetry
del out115['swappable_ligs'][1]

view_initmol(out115)

Reaction_name                              MOBH26
Reactant(s)                                   r26
Product(s)                                    p26
sdf_path         ../reaction_complexes/MOBH26.sdf
Name: 115, dtype: object


['[H][H]', 'CP(C)C', 'CP(C)C', '[C-]#[O+]', '[H-]', '[H-]']


In [135]:
n = 116
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out116 = rxn_breakdown(rxn)
view_initmol(out116)

view_structures(out116['final_struct'])

# # # This one is very tricky again. It's probably supposed to be Os(II)
# # # Trying to match reported for https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 
print(out116['pred_lig_smis'])

out116['possible_metal_oxs'] = [[2,3,4]]

# Reduce symmetry
del out116['swappable_ligs'][1]

view_initmol(out116)

Reaction_name                              MOBH27
Reactant(s)                                   r27
Product(s)                                    p27
sdf_path         ../reaction_complexes/MOBH27.sdf
Name: 116, dtype: object


['[NH3][BH3+]', 'CP(C)C', 'CP(C)C', '[C-]#[O+]', '[H-]', '[H-]']


In [136]:
n = 117
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out117 = rxn_breakdown(rxn)
view_initmol(out117)

# # # This one is very tricky again. It's probably supposed to be Ir(II)
# # # Trying to match reported for https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 
print(out117['pred_lig_smis'])

out117['possible_metal_oxs'] = [[2,3,4]]

view_initmol(out117)

Reaction_name                              MOBH28
Reactant(s)                                   r28
Product(s)                                    p28
sdf_path         ../reaction_complexes/MOBH28.sdf
Name: 117, dtype: object


['c1[c-]cccc1', 'CP(Cc1cccc(n1)CP(C)C)C', 'O', 'O']


In [137]:
n = 118
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out118 = rxn_breakdown(rxn)
view_initmol(out118)

# # # This one is very tricky again. It's probably supposed to be Ir(IV)
# # # Trying to match reported for https://pubs.acs.org/doi/full/10.1021/acs.jpca.9b01546 
print(out118['pred_lig_smis'])

#One of the ligands (tridentate ligand) should be -1.
# Update total ligcharge value
out118['pred_lig_charges'] = [-3]
view_initmol(out118)

Reaction_name                              MOBH29
Reactant(s)                                   r29
Product(s)                                    p29
sdf_path         ../reaction_complexes/MOBH29.sdf
Name: 118, dtype: object


['CP(Cc1cccc(n1)C=[P](C)C)C', 'c1[c-]cccc1', 'O', 'O', '[H][H]', '[H-]']


In [138]:
n = 119
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out119 = rxn_breakdown(rxn)
view_initmol(out119)

# # # # This one is very tricky again. It's probably supposed to be Pt(III) but that makes no sense.
print(out119['pred_lig_smis'])

# Keeping as-is


Reaction_name                              MOBH30
Reactant(s)                               r30_r31
Product(s)                                    p30
sdf_path         ../reaction_complexes/MOBH30.sdf
Name: 119, dtype: object


['CN(CCc1cccc([c-]1)CP(C)C)C', '[CH3-]', '[H][H]']


In [139]:
n = 120
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out120 = rxn_breakdown(rxn)
view_initmol(out120)

# # # # This one is very tricky again. It's probably supposed to be Pt(III) but that makes no sense.
print(out120['pred_lig_smis'])

# Keeping as-is as with 119. Similar reactions with different products


Reaction_name                              MOBH31
Reactant(s)                               r30_r31
Product(s)                                    p31
sdf_path         ../reaction_complexes/MOBH31.sdf
Name: 120, dtype: object


['CN(CCc1cccc([c-]1)CP(C)C)C', '[CH3-]', '[H][H]']


In [140]:
n = 121
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out121 = rxn_breakdown(rxn)
view_initmol(out121)

# # # # This one is very tricky again. It's probably supposed to be Pt(IV).
print(out121['pred_lig_smis'])

# Keeping as-is.

# Reducing symmetry
del out121['swappable_ligs'][1]
view_initmol(out121)


Reaction_name                              MOBH32
Reactant(s)                               r32plus
Product(s)                                p32plus
sdf_path         ../reaction_complexes/MOBH32.sdf
Name: 121, dtype: object


['[CH-2]/C=C\\C=[CH-]', 'P', 'P']


In [141]:
n = 122
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out122 = rxn_breakdown(rxn)
view_initmol(out122)
view_structures(out122['final_struct'])

# # # # This one is very tricky again. It's probably supposed to be Pt(IV).
print(out122['pred_lig_smis'])

# Keeping as-is.


Reaction_name                              MOBH33
Reactant(s)                               r33plus
Product(s)                                p33plus
sdf_path         ../reaction_complexes/MOBH33.sdf
Name: 122, dtype: object


['[CH3-]', 'N', 'N', '[H-]', '[H-]']


In [142]:
n = 123
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out123 = rxn_breakdown(rxn)
view_initmol(out123)

# # # # This one is very tricky again. It's probably supposed to be Pt(III). Again, no sense
print(out123['pred_lig_smis'])

# Keeping as-is.


Reaction_name                              MOBH34
Reactant(s)                                   r34
Product(s)                                p34_r35
sdf_path         ../reaction_complexes/MOBH34.sdf
Name: 123, dtype: object


['c1cnn(c1)[BH+](n1cccn1)n1cccn1', '[CH3-]', '[H-]', '[H-]']


In [143]:
n = 124
rxn = df.iloc[n]['sdf_path']
print(df.iloc[n])
out124 = rxn_breakdown(rxn)
view_initmol(out124)

# # # # This one is very tricky again. It's probably supposed to be Pt(0). Again, no sense
print(out124['pred_lig_smis'])

# Metal oxidation state should be II

# Ligcharge should be +1 on boron, -1 on H-.
out124['pred_lig_charges'] = [0]

view_initmol(out124)

Reaction_name                              MOBH35
Reactant(s)                               p34_r35
Product(s)                                p35+CH4
sdf_path         ../reaction_complexes/MOBH35.sdf
Name: 124, dtype: object


['c1cnn(c1)[BH+]([N+]1=[N+]=CC=C1)n1cccn1', 'C', '[H-]']


In [144]:
# LAST ONE DONE.
df.index.values[-1]

np.int64(124)

In [145]:
outlst = [out0,out1,out2,out3,out4,out5,out6,out7,out8,out9,out10,
          out11,out12,out13,out14,out15,out16,out17,out18,out19,out20,
          out21,out22,out23,out24,out25,out26,out27,out28,out29,out30,
          out31,out32,out33,out34,out35,out36,out37,out38,out39,out40,
          out41,out42,out43,out44,out45,out46,out47,out48,out49,out50,
          out51,out52,out53,out54,out55,out56,out57,out58,out59,out60,
          out61,out62,out63,out64,out65,out66,out67,out68,out69,out70,
          out71,out72,out73,out74,out75,out76,out77,out78,out79,out80,
          out81,out82,out83,out84,out85,out86,out87,out88,out89,out90,
          out91,out92,out93,out94,out95,out96,out97,out98,out99,out100,
          out101,out102,out103,out104,out105,out106,out107,out108,out109,out110,
          out111,out112,out113,out114,out115,out116,out117,out118,out119,out120,
          out121,out122,out123,out124]
newdf = pd.DataFrame(outlst)

In [146]:
newdf

Unnamed: 0,metals,metal_inds,possible_metal_oxs,init_struct,final_struct,swappable_ligs,pred_lig_charges,pred_lig_smis
0,[Cr],[2],"[[2, 3]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': '[C-]#[O+]', 'inds': [5, 6], 'coor...","[0, 0, 0, 0, 0, 0]","[C#[O], [C-]#[O+], [C-]#[O+], [C-]#[O+], [C-]#..."
1,[Fe],[2],"[[2, 3]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': '[C-]#[O+]', 'inds': [0, 1], 'coor...","[0, 0, 0, 0, 0]","[[C-]#[O+], C#[O], [C-]#[O+], [C-]#[O+], [C-]#..."
2,[Ni],[2],[[2]],charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': '[C-]#[O+]', 'inds': [3, 6], 'coor...","[0, 0, 0, 0]","[C#[O], [C-]#[O+], [C-]#[O+], [C-]#[O+]]"
3,[Co],[12],"[[2, 3]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,[],[-3],"[[C]#[O], [cH-]1cccc1, C#C, C#C]"
4,[Mn],[2],"[[2, 3, 4]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': '[C-]#[O+]', 'inds': [3, 4], 'coor...","[0, 0, 0, 0, 0, 0, -1]","[[C]#[O], [C-]#[O+], [C-]#[O+], [C-]#[O+], [C-..."
...,...,...,...,...,...,...,...,...
120,[Pt],[9],"[[2, 4]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': '[CH3-]', 'inds': [10, 26, 27, 28]...","[-1, -1, 0]","[CN(CCc1cccc([c-]1)CP(C)C)C, [CH3-], [H][H]]"
121,[Pt],[5],"[[2, 4]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': 'P', 'inds': [6, 16, 17, 18], 'coo...","[-3, 0, 0]","[[CH-2]/C=C\C=[CH-], P, P]"
122,[Pt],[1],"[[2, 4]]",charge=0\n RDKit 3D\n\n 0 0 0 ...,charge=0\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': 'N', 'inds': [2, 5, 13, 14], 'coor...","[-1, 0, 0, -1, -1]","[[CH3-], N, N, [H-], [H-]]"
123,[Pt],[5],"[[2, 4]]",charge=-1\n RDKit 3D\n\n 0 0 0...,charge=1\n RDKit 3D\n\n 0 0 0 ...,"[{'smiles': '[H-]', 'inds': [32], 'coordlist':...","[1, -1, -1, -1]","[c1cnn(c1)[BH+](n1cccn1)n1cccn1, [CH3-], [H-],..."


In [147]:
concat = pd.concat([df,newdf],axis=1)

In [148]:
total_lig_charges = []
# Add total ligand charge.
for i,row in concat.iterrows():
    total_lig_charges.append(np.sum(row['pred_lig_charges']))
concat['total_lig_charges'] = total_lig_charges

In [149]:
# Save dataframe for sampling.
concat.to_pickle('../3_swap_ligands/sample_ready_rnxs.pkl')