## Targets from README.md
5. Create routine for sampling reproducibly from metal/ligand swaps. (Sample from 1-n ligands to swap, sample from metal swap)
6. Create routine for implementing a selected metal/ligand swap. Swap metal(s). Update complex spin/charge. Delete ligand atoms from selected ligands to swap - ideally, save metal-coordination locations. Add ligands + create bonds - in 3D. See where UFF takes us. Maybe do constrained XTB relaxation.
7. Check indices of the newly generated reactions. They shouldn't be messed with. Check the 3D structures generated that not overlapping.

Max number of atoms is 100. -> Done
Perform distance Sanity Checks. -> Done

In [1]:
import pandas as pd
from architector import view_structures, convert_io_molecule
from tqdm import tqdm
import numpy as np
np.random.seed(42)

In [2]:
metal_swap_df = pd.read_csv('../2_inspect_rxns/rxn_m_swap_df.csv')
ligand_swap_df = pd.read_pickle('../2_inspect_rxns/rxn_ligands.pkl')
rxn_df = pd.read_pickle('sample_ready_rnxs.pkl')

In [3]:
metal_swap_df.iloc[0]

Unnamed: 0     0
metal         La
ox             3
uhf            0
Name: 0, dtype: object

In [4]:
ligand_swap_df.iloc[0]

uid                                                 [Te-]c1ccccc10
smiles                                               [Te-]c1ccccc1
coordList                                                      [0]
coord_atom_symols                                               Te
coord_atom_types                                                Te
non_coord_atom_symbols                       C,C,C,C,C,C,H,H,H,H,H
non_coord_atom_types       C.ar,C.ar,C.ar,C.ar,C.ar,C.ar,H,H,H,H,H
charge                                                          -1
denticity                                                        1
metal_ox_bound                                                Cu,1
frequency                                                       59
selected_coord_type                                             Te
selected_non_coord_type                                       None
natoms                                                          12
Name: 0, dtype: object

In [5]:
rxn_df.iloc[0]['swappable_ligs']

[{'smiles': '[C-]#[O+]',
  'inds': array([5, 6]),
  'coordlist': [0],
  'denticity': 1,
  'metals_bound': [2],
  'charge': 0},
 {'smiles': '[C-]#[O+]',
  'inds': array([7, 8]),
  'coordlist': [0],
  'denticity': 1,
  'metals_bound': [2],
  'charge': 0}]

In [6]:
rxn_df.iloc[0]

Reaction_name                                                      MOR1
Reactant(s)                                                     ED01+CO
Product(s)                                                         PR01
sdf_path                                 ../reaction_complexes/MOR1.sdf
metals                                                             [Cr]
metal_inds                                                          [2]
possible_metal_oxs                                             [[2, 3]]
init_struct           charge=0\n     RDKit          3D\n\n  0  0  0 ...
final_struct          charge=0\n     RDKit          3D\n\n  0  0  0 ...
swappable_ligs        [{'smiles': '[C-]#[O+]', 'inds': [5, 6], 'coor...
pred_lig_charges                                     [0, 0, 0, 0, 0, 0]
pred_lig_smis         [C#[O], [C-]#[O+], [C-]#[O+], [C-]#[O+], [C-]#...
total_lig_charges                                                     0
Name: 0, dtype: object

In [7]:
# Enumerate possibilites
total_count = 0
possible_n_swaps = []
possible_m_swaps = []
possibe_lig_swaps = []
for i,rxn in rxn_df.iterrows():
    metal_oxs = rxn['possible_metal_oxs']
    n_metal_swaps = 1
    for oxs in metal_oxs:
        n = 0
        for ox in oxs:
            n += len(metal_swap_df[metal_swap_df.ox == ox])
        n_metal_swaps *= n
    n_lig_swaps = 0
    for swap in rxn['swappable_ligs']:
        dent = swap['denticity']
        charge = swap['charge']
        n_lig_swaps += len(ligand_swap_df[(ligand_swap_df.charge == charge) & (ligand_swap_df.denticity == dent)])
    if n_lig_swaps > 0:
        nswaps = n_metal_swaps*n_lig_swaps
    else:
        nswaps = n_metal_swaps
    possible_n_swaps.append(nswaps)
    possible_m_swaps.append(n_metal_swaps)
    possibe_lig_swaps.append(n_lig_swaps)
    total_count += nswaps
rxn_df['possible_n_swaps'] = possible_n_swaps
rxn_df['possible_m_swaps'] = possible_m_swaps
rxn_df['possible_lig_swaps'] = possibe_lig_swaps
print(total_count)

1149169


In [8]:
rxn_df.iloc[0]

Reaction_name                                                      MOR1
Reactant(s)                                                     ED01+CO
Product(s)                                                         PR01
sdf_path                                 ../reaction_complexes/MOR1.sdf
metals                                                             [Cr]
metal_inds                                                          [2]
possible_metal_oxs                                             [[2, 3]]
init_struct           charge=0\n     RDKit          3D\n\n  0  0  0 ...
final_struct          charge=0\n     RDKit          3D\n\n  0  0  0 ...
swappable_ligs        [{'smiles': '[C-]#[O+]', 'inds': [5, 6], 'coor...
pred_lig_charges                                     [0, 0, 0, 0, 0, 0]
pred_lig_smis         [C#[O], [C-]#[O+], [C-]#[O+], [C-]#[O+], [C-]#...
total_lig_charges                                                     0
possible_n_swaps                                                

In [9]:
# Generate, but hold out no ligand swaps
print(rxn_df[rxn_df.possible_lig_swaps == 0].shape)
rxn_df[rxn_df.possible_lig_swaps == 0].possible_n_swaps.sum()

(31, 16)


np.int64(1436)

In [10]:
rxn_df[rxn_df.possible_n_swaps < 200000/125].shape

(48, 16)

In [11]:
enumerate_df = rxn_df[rxn_df.possible_n_swaps < 200000/125]
sample_df = rxn_df[rxn_df.possible_n_swaps >= 200000/125]

In [12]:
# Total possibilites for enumerated dataframe
enumerate_df.possible_n_swaps.sum()

np.int64(16936)

In [13]:
rxn_df.iloc[21]

Reaction_name                                                     MOR22
Reactant(s)                                                     ED22+I2
Product(s)                                                         PR22
sdf_path                                ../reaction_complexes/MOR22.sdf
metals                                                         [Ir, Ir]
metal_inds                                                       [2, 3]
possible_metal_oxs                                           [[3], [3]]
init_struct           charge=0\n     RDKit          3D\n\n  0  0  0 ...
final_struct          charge=0\n     RDKit          3D\n\n  0  0  0 ...
swappable_ligs        [{'smiles': '[S-]C(C)(C)C', 'inds': [4, 8, 10,...
pred_lig_charges                                [0, -1, -1, 0, 0, 0, 0]
pred_lig_smis         [II, [S-]C(C)(C)C, [S-]C(C)(C)C, [C-]#[O+], [C...
total_lig_charges                                                    -2
possible_n_swaps                                                

In [14]:
def enumerate_all_rxns(rxn, max_natoms=100):
    ## Test on 0 (works), 10 (done), 21 (done), 40 (done), 102
    # max_natoms = 100
    # n = 102
    # rxn = rxn_df.iloc[n]
    print('Possible',rxn['possible_n_swaps'])
    swap_dicts = []
    done_uuids = []
    too_large = []
    # init_mol = convert_io_molecule(rxn['init_struct'])
    # final_mol = convert_io_molecule(rxn['final_struct'])
    metal_oxs = np.concat(rxn['possible_metal_oxs']).reshape(-1)
    natoms_init = len(convert_io_molecule(rxn['init_struct']).ase_atoms)
    with tqdm(total=rxn['possible_n_swaps']) as pbar:
        if len(rxn['metal_inds']) == 1:
            mind1 = rxn['metal_inds'][0]
            i = 0
            for ox in rxn['possible_metal_oxs'][i]:
                tmswaps = metal_swap_df[metal_swap_df.ox == ox]
                for _, mrow in tmswaps.iterrows():
                    total_charge = mrow['ox'] + np.sum(rxn['pred_lig_charges'])
                    metal = mrow['metal']
                    total_uhf = mrow['uhf']
                    if len(rxn['swappable_ligs']) > 0:
                        for j,swap in enumerate(rxn['swappable_ligs']):
                            dent = swap['denticity']
                            charge = swap['charge']
                            swaprows = ligand_swap_df[(ligand_swap_df.charge == charge) & (ligand_swap_df.denticity == dent)]
                            for k,row in swaprows.iterrows():
                                # Check atoms
                                temp_natoms = natoms_init - len(swap['inds']) + row['natoms']
                                uuid = '_'.join(
                                    [rxn['Reaction_name'],
                                    metal+str(ox),
                                    'Charge'+str(total_charge),
                                    'UHF'+str(total_uhf),
                                    'swaplig',
                                    str(j),
                                    'newlig',
                                    row['uid']])
                                # Don't do done, check total atoms.
                                uuid = "".join(c for c in uuid if c.isalpha() or c.isdigit() or c==' ' or c=='_').rstrip()
                                if (uuid in done_uuids) or (temp_natoms > max_natoms):
                                    too_large.append((row['smiles'],row['natoms']))
                                    pbar.update(1)
                                    pass
                                else:
                                    if (len(swap['metals_bound']) < dent) and (len(swap['metals_bound']) == 1):
                                        swap['metals_bound'] = [swap['metals_bound'][0] for x in range(dent)]
                                    done_uuids.append(uuid)
                                    outdict = {
                                        'rxn_uid':uuid,
                                        'rxn_name':rxn['Reaction_name'],
                                        'reactants':rxn['Reactant(s)'],
                                        'products':rxn['Product(s)'],
                                        'init_sdf':rxn['init_struct'],
                                        'final_sdf':rxn['final_struct'],
                                        'metal_inds':[mind1],
                                        'metal_syms':[metal],
                                        'metal_oxs':[ox],
                                        'total_charge':total_charge,
                                        'total_uhf':total_uhf,
                                        'swap_remove_inds':swap['inds'],
                                        'swap_functional_groups':[row['smiles']],
                                        'swap_functionalization_inds':[swap['metals_bound']],
                                        'swap_functional_group_mol_inds':[row['coordList']],
                                        'swap_bond_orders':[[1 for x in row['coordList']]],
                                        'swap_remove_hydrogens_when_adding':[[False for x in row['coordList']]],
                                        'swap_xtb_opt':False
                                    }
                                    swap_dicts.append(outdict)
                                    pbar.update(1)
                    else: # No swappable ligands.
                        uuid = '_'.join(
                                    [rxn['Reaction_name'],
                                    metal+str(ox),
                                    'Charge'+str(total_charge),
                                    'UHF'+str(total_uhf),
                                    'swaplig',
                                    'None',
                                    'newlig',
                                    'None'])
                                # Don't do done, check total atoms.
                        uuid = "".join(c for c in uuid if c.isalpha() or c.isdigit() or c==' ' or c=='_').rstrip()
                        if (uuid in done_uuids):
                            pbar.update(1)
                            pass
                        else:
                            done_uuids.append(uuid)
                            outdict = {
                                'rxn_uid':uuid,
                                'rxn_name':rxn['Reaction_name'],
                                'reactants':rxn['Reactant(s)'],
                                'products':rxn['Product(s)'],
                                'init_sdf':rxn['init_struct'],
                                'final_sdf':rxn['final_struct'],
                                'metal_inds':[mind1],
                                'metal_syms':[metal],
                                'metal_oxs':[ox],
                                'total_charge':total_charge,
                                'total_uhf':total_uhf,
                                'swap_remove_inds':None,
                                'swap_functional_groups':None,
                                'swap_functionalization_inds':None,
                                'swap_functional_group_mol_inds':None,
                                'swap_bond_orders':None,
                                'swap_remove_hydrogens_when_adding':None,
                                'swap_xtb_opt':None
                            }
                            swap_dicts.append(outdict)
                            pbar.update(1)
        else:
            mind1 = rxn['metal_inds'][0]
            mind2 = rxn['metal_inds'][1]
            i = 0
            for ox1 in rxn['possible_metal_oxs'][0]:
                for ox2 in rxn['possible_metal_oxs'][1]:
                    tmswaps1 = metal_swap_df[metal_swap_df.ox == ox1]
                    tmswaps2 =  metal_swap_df[metal_swap_df.ox == ox2]
                    for _, mrow1 in tmswaps1.iterrows():
                        for _, mrow2 in tmswaps2.iterrows():
                            total_charge = mrow1['ox'] + np.sum(rxn['pred_lig_charges']) + mrow2['ox']
                            metal1 = mrow1['metal']
                            metal2 = mrow2['metal']
                            total_uhf = mrow1['uhf'] + mrow2['uhf']
                            if len(rxn['swappable_ligs']) > 0:
                                for j,swap in enumerate(rxn['swappable_ligs']):
                                    dent = swap['denticity']
                                    charge = swap['charge']
                                    swaprows = ligand_swap_df[(ligand_swap_df.charge == charge) & (ligand_swap_df.denticity == dent)]
                                    for k,row in swaprows.iterrows():
                                        # Check atoms
                                        temp_natoms = natoms_init - len(swap['inds']) + row['natoms']
                                        uuid = '_'.join(
                                            [rxn['Reaction_name'],
                                            metal1+str(ox1)+metal2+str(ox2),
                                            'Charge'+str(total_charge),
                                            'UHF'+str(total_uhf),
                                            'swaplig',
                                            str(j),
                                            'newlig',
                                            row['uid']])
                                        # Don't do done, check total atoms.
                                        uuid = "".join(c for c in uuid if c.isalpha() or c.isdigit() or c==' ' or c=='_').rstrip()
                                        if (uuid in done_uuids) or (temp_natoms > max_natoms):
                                            too_large.append((row['smiles'],row['natoms']))
                                            pbar.update(1)
                                            pass
                                        else:
                                            if (len(swap['metals_bound']) < dent) and (len(swap['metals_bound']) == 1):
                                                swap['metals_bound'] = [swap['metals_bound'][0] for x in range(dent)]
                                            elif (len(row['coordList']) < len(swap['metals_bound'])): # Bound to metals added
                                                row['coordList'] = [row['coordList'][0] for x in range(len(swap['metals_bound']))]
                                            done_uuids.append(uuid)
                                            outdict = {
                                                'rxn_uid':uuid,
                                                'rxn_name':rxn['Reaction_name'],
                                                'reactants':rxn['Reactant(s)'],
                                                'products':rxn['Product(s)'],
                                                'init_sdf':rxn['init_struct'],
                                                'final_sdf':rxn['final_struct'],
                                                'metal_inds':[mind1,mind2],
                                                'metal_syms':[metal1,metal2],
                                                'metal_oxs':[ox1,ox2],
                                                'total_charge':total_charge,
                                                'total_uhf':total_uhf,
                                                'swap_remove_inds':swap['inds'],
                                                'swap_functional_groups':[row['smiles']],
                                                'swap_functionalization_inds':[swap['metals_bound']],
                                                'swap_functional_group_mol_inds':[row['coordList']],
                                                'swap_bond_orders':[[1 for x in row['coordList']]],
                                                'swap_remove_hydrogens_when_adding':[[False for x in row['coordList']]],
                                                'swap_xtb_opt':False
                                            }
                                            swap_dicts.append(outdict)
                                            pbar.update(1)
                            else: # No swappable ligands.
                                uuid = '_'.join(
                                            [rxn['Reaction_name'],
                                            metal1+str(ox1)+metal2+str(ox2),
                                            'Charge'+str(total_charge),
                                            'UHF'+str(total_uhf),
                                            'swaplig',
                                            'None',
                                            'newlig',
                                            'None'])
                                        # Don't do done, check total atoms.
                                uuid = "".join(c for c in uuid if c.isalpha() or c.isdigit() or c==' ' or c=='_').rstrip()
                                if (uuid in done_uuids):
                                    pbar.update(1)
                                    pass
                                else:
                                    done_uuids.append(uuid)
                                    outdict = {
                                        'rxn_uid':uuid,
                                        'rxn_name':rxn['Reaction_name'],
                                        'reactants':rxn['Reactant(s)'],
                                        'products':rxn['Product(s)'],
                                        'init_sdf':rxn['init_struct'],
                                        'final_sdf':rxn['final_struct'],
                                        'metal_inds':[mind1,mind2],
                                        'metal_syms':[metal1,metal2],
                                        'metal_oxs':[ox1,ox2],
                                        'total_charge':total_charge,
                                        'total_uhf':total_uhf,
                                        'swap_remove_inds':None,
                                        'swap_functional_groups':None,
                                        'swap_functionalization_inds':None,
                                        'swap_functional_group_mol_inds':None,
                                        'swap_bond_orders':None,
                                        'swap_remove_hydrogens_when_adding':None,
                                        'swap_xtb_opt':None
                                    }
                                    pbar.update(1)
                                    swap_dicts.append(outdict)
    print('Number populated',len(swap_dicts))
    return pd.DataFrame(swap_dicts)

In [15]:
mol = convert_io_molecule('CCCC')


In [16]:
mol.dist_sanity_checks?

[0;31mSignature:[0m
[0mmol[0m[0;34m.[0m[0mdist_sanity_checks[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0msmallest_dist_cutoff[0m[0;34m=[0m[0;36m0.55[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmin_dist_cutoff[0m[0;34m=[0m[0;36m3[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mparams[0m[0;34m=[0m[0;34m{[0m[0;34m}[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpair_cutoffs[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0massembly[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcovrad_metal[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdebug[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
dist_sanity_checks
Perform basic distance-based sanity checks on structure

Parameters
-------
atoms : ase.Atoms
    atoms to check for sanity.
params : dict, optional
    parameters for dictionary, default {}.
pair_cutoffs : dic

In [17]:
mol.graph_sanity_checks?

[0;31mSignature:[0m [0mmol[0m[0;34m.[0m[0mgraph_sanity_checks[0m[0;34m([0m[0mfactor[0m[0;34m=[0m[0;36m1.45[0m[0;34m,[0m [0mparams[0m[0;34m=[0m[0;34m{[0m[0;34m}[0m[0;34m,[0m [0massembly[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
graph_sanity_checks
Check if any part of the molecule is blown up relative to the imposed molecular graph

Parameters
----------
factor : float, optional
    tolerance for long bonds - factor*(sum of covalent radii), by default 1.4
params : dict, optional
    parameters from inputDict, default {}
assembly : bool, optional
    if this is an assembly check or final check, default False -> final cutoffs.

Returns
-------
sane : bool
    If the graph distances are longer than the factor -> indicating relatively garbage
    geometry.
[0;31mFile:[0m      ~/software/Architector/architector/io_molecule.py
[0;31mType:[0m      method

In [18]:
mol.dist_sanity_checks(smallest_dist_cutoff=0.7) # Check that atoms are not overlapping
mol.graph_sanity_checks()
mol.dists_sane

True

In [19]:
def perform_swap(swapdict, writeout=False, call=0):
    init_mol = convert_io_molecule(swapdict['init_sdf'])
    init_mol.charge = 0
    init_mol.uhf = 0
    final_mol = convert_io_molecule(swapdict['final_sdf'])
    final_mol.charge = 0
    final_mol.uhf = 0
    # Swap metal(s)
    for i,metind in enumerate(swapdict['metal_inds']):
        init_mol.ase_atoms[metind].symbol = swapdict['metal_syms'][i]
        init_mol.atom_types[metind] = swapdict['metal_syms'][i]
        final_mol.ase_atoms[metind].symbol = swapdict['metal_syms'][i]
        final_mol.atom_types[metind] = swapdict['metal_syms'][i]
    # Functionalize 3D
    if swapdict['swap_remove_inds'] is not None:
        init_mol.functionalize_3D(
            functional_groups=swapdict['swap_functional_groups'],
            functionalization_inds=swapdict['swap_functionalization_inds'],
            functional_group_mol_inds=swapdict['swap_functional_group_mol_inds'],
            remove_inds=swapdict['swap_remove_inds'],
            bond_orders=swapdict['swap_bond_orders'],
            remove_hydrogens_when_adding=swapdict['swap_remove_hydrogens_when_adding'],
            xtb_opt=swapdict['swap_xtb_opt']
        )
        final_mol.functionalize_3D(
            functional_groups=swapdict['swap_functional_groups'],
            functionalization_inds=swapdict['swap_functionalization_inds'],
            functional_group_mol_inds=swapdict['swap_functional_group_mol_inds'],
            remove_inds=swapdict['swap_remove_inds'],
            bond_orders=swapdict['swap_bond_orders'],
            remove_hydrogens_when_adding=swapdict['swap_remove_hydrogens_when_adding'],
            xtb_opt=swapdict['swap_xtb_opt']
        )
    init_mol.dist_sanity_checks(smallest_dist_cutoff=0.7) # Check that atoms are not overlapping
    init_mol.graph_sanity_checks()
    if init_mol.dists_sane:
        init_sdf = init_mol.write_sdf('init',writestring=True)
        final_sdf = final_mol.write_sdf('final',writestring=True)
        outstr = init_sdf + final_sdf
        outname = swapdict['rxn_uid'] + '.sdf'
        if writeout:
            with open(outname,'w') as file1:
                file1.write(outstr)
            return 'Done'
        else:
            print(outname)
            return outstr
    elif call < 11:
        out = perform_swap(swapdict, writeout=writeout, call=call+1)
        return out
    else:
        view_structures(init_mol)
        raise ValueError('Overlapping Atoms Produced 10X Do not try again: '+swapdict['rxn_uid'])

In [20]:
# Generate all swappable_dfs
swap_dfs = []
for i,row in rxn_df.iterrows():
    swapdf = enumerate_all_rxns(row)
    swap_dfs.append(swapdf)

Possible 8162


100%|██████████| 8162/8162 [00:00<00:00, 15277.69it/s]


Number populated 8162
Possible 8162


100%|██████████| 8162/8162 [00:00<00:00, 14515.28it/s]


Number populated 8162
Possible 1617


100%|██████████| 1617/1617 [00:00<00:00, 16551.82it/s]


Number populated 1617
Possible 53


100%|██████████| 53/53 [00:00<00:00, 21940.20it/s]


Number populated 53
Possible 16170


100%|██████████| 16170/16170 [00:01<00:00, 10563.10it/s]


Number populated 16170
Possible 8162


100%|██████████| 8162/8162 [00:00<00:00, 14506.48it/s]


Number populated 8162
Possible 3927


100%|██████████| 3927/3927 [00:00<00:00, 20093.83it/s]


Number populated 3757
Possible 11760


100%|██████████| 11760/11760 [00:00<00:00, 13178.89it/s]


Number populated 10780
Possible 5216


100%|██████████| 5216/5216 [00:00<00:00, 19279.40it/s]


Number populated 4832
Possible 2464


100%|██████████| 2464/2464 [00:00<00:00, 22090.53it/s]


Number populated 2464
Possible 7527


100%|██████████| 7527/7527 [00:00<00:00, 14372.35it/s]


Number populated 7527
Possible 8162


100%|██████████| 8162/8162 [00:00<00:00, 14994.42it/s]


Number populated 8162
Possible 672


100%|██████████| 672/672 [00:00<00:00, 20465.37it/s]


Number populated 672
Possible 504


100%|██████████| 504/504 [00:00<00:00, 20480.44it/s]


Number populated 504
Possible 21


100%|██████████| 21/21 [00:00<00:00, 21561.91it/s]


Number populated 21
Possible 3111


100%|██████████| 3111/3111 [00:00<00:00, 20128.25it/s]


Number populated 2669
Possible 7987


100%|██████████| 7987/7987 [00:00<00:00, 14332.47it/s]


Number populated 7742
Possible 11760


100%|██████████| 11760/11760 [00:00<00:00, 12958.76it/s]


Number populated 11760
Possible 11760


100%|██████████| 11760/11760 [00:00<00:00, 12264.83it/s]


Number populated 11760
Possible 11760


100%|██████████| 11760/11760 [00:00<00:00, 13105.21it/s]


Number populated 11760
Possible 3104


100%|██████████| 3104/3104 [00:00<00:00, 20012.11it/s]


Number populated 3104
Possible 166912


100%|██████████| 166912/166912 [01:46<00:00, 1567.17it/s]


Number populated 166912
Possible 11760


100%|██████████| 11760/11760 [00:00<00:00, 12586.13it/s]


Number populated 11760
Possible 11760


100%|██████████| 11760/11760 [00:00<00:00, 13091.55it/s]


Number populated 11760
Possible 8526


100%|██████████| 8526/8526 [00:00<00:00, 22350.26it/s]


Number populated 3528
Possible 1836


100%|██████████| 1836/1836 [00:00<00:00, 29561.28it/s]


Number populated 187
Possible 4214


100%|██████████| 4214/4214 [00:00<00:00, 17376.75it/s]


Number populated 3626
Possible 4214


100%|██████████| 4214/4214 [00:00<00:00, 9819.87it/s] 


Number populated 3822
Possible 4214


100%|██████████| 4214/4214 [00:00<00:00, 20122.43it/s]


Number populated 3724
Possible 3268


100%|██████████| 3268/3268 [00:00<00:00, 19334.45it/s]


Number populated 3268
Possible 3268


100%|██████████| 3268/3268 [00:00<00:00, 20998.64it/s]


Number populated 2926
Possible 3268


100%|██████████| 3268/3268 [00:00<00:00, 21749.39it/s]


Number populated 2774
Possible 7546


100%|██████████| 7546/7546 [00:00<00:00, 15748.03it/s]


Number populated 7546
Possible 7546


100%|██████████| 7546/7546 [00:00<00:00, 14835.79it/s]


Number populated 7546
Possible 7546


100%|██████████| 7546/7546 [00:00<00:00, 15835.01it/s]


Number populated 7546
Possible 21


100%|██████████| 21/21 [00:00<00:00, 17441.66it/s]


Number populated 21
Possible 21


100%|██████████| 21/21 [00:00<00:00, 17722.41it/s]


Number populated 21
Possible 1309


100%|██████████| 1309/1309 [00:00<00:00, 22833.90it/s]


Number populated 1309
Possible 1309


100%|██████████| 1309/1309 [00:00<00:00, 23799.66it/s]


Number populated 1309
Possible 78848


100%|██████████| 78848/78848 [00:24<00:00, 3167.06it/s] 


Number populated 78848
Possible 385280


100%|██████████| 385280/385280 [09:34<00:00, 671.11it/s] 


Number populated 385280
Possible 12201


100%|██████████| 12201/12201 [00:00<00:00, 13346.48it/s]


Number populated 10927
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 16860.15it/s]


Number populated 6020
Possible 49


100%|██████████| 49/49 [00:00<00:00, 21549.85it/s]


Number populated 49
Possible 70


100%|██████████| 70/70 [00:00<00:00, 17473.15it/s]


Number populated 70
Possible 53


100%|██████████| 53/53 [00:00<00:00, 23071.94it/s]


Number populated 53
Possible 1113


100%|██████████| 1113/1113 [00:00<00:00, 17905.95it/s]


Number populated 1113
Possible 53


100%|██████████| 53/53 [00:00<00:00, 24302.84it/s]


Number populated 53
Possible 684


100%|██████████| 684/684 [00:00<00:00, 9677.82it/s]


Number populated 684
Possible 57


100%|██████████| 57/57 [00:00<00:00, 21947.61it/s]


Number populated 57
Possible 57


100%|██████████| 57/57 [00:00<00:00, 16724.40it/s]


Number populated 57
Possible 57


100%|██████████| 57/57 [00:00<00:00, 18993.83it/s]


Number populated 57
Possible 57


100%|██████████| 57/57 [00:00<00:00, 18866.42it/s]


Number populated 57
Possible 53


100%|██████████| 53/53 [00:00<00:00, 19565.05it/s]


Number populated 53
Possible 5194


100%|██████████| 5194/5194 [00:00<00:00, 16770.64it/s]


Number populated 5194
Possible 13680


100%|██████████| 13680/13680 [00:01<00:00, 11889.06it/s]


Number populated 13395
Possible 13680


100%|██████████| 13680/13680 [00:01<00:00, 12509.02it/s]


Number populated 12597
Possible 6278


100%|██████████| 6278/6278 [00:00<00:00, 15699.98it/s]


Number populated 6278
Possible 57


100%|██████████| 57/57 [00:00<00:00, 21771.73it/s]


Number populated 57
Possible 3234


100%|██████████| 3234/3234 [00:00<00:00, 20301.31it/s]


Number populated 3234
Possible 1309


100%|██████████| 1309/1309 [00:00<00:00, 23166.50it/s]


Number populated 1309
Possible 1309


100%|██████████| 1309/1309 [00:00<00:00, 23137.01it/s]


Number populated 1309
Possible 651


100%|██████████| 651/651 [00:00<00:00, 19282.05it/s]


Number populated 651
Possible 651


100%|██████████| 651/651 [00:00<00:00, 19303.86it/s]


Number populated 651
Possible 6270


100%|██████████| 6270/6270 [00:00<00:00, 24638.58it/s]


Number populated 2109
Possible 49


100%|██████████| 49/49 [00:00<00:00, 23136.43it/s]


Number populated 49
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 16937.62it/s]


Number populated 6020
Possible 70


100%|██████████| 70/70 [00:00<00:00, 21479.35it/s]


Number populated 70
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 16345.07it/s]


Number populated 6020
Possible 3612


100%|██████████| 3612/3612 [00:00<00:00, 19831.87it/s]


Number populated 3612
Possible 4214


100%|██████████| 4214/4214 [00:00<00:00, 18766.92it/s]


Number populated 4116
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 17015.98it/s]


Number populated 6020
Possible 4214


100%|██████████| 4214/4214 [00:00<00:00, 19183.69it/s]


Number populated 4214
Possible 4214


100%|██████████| 4214/4214 [00:00<00:00, 19390.87it/s]


Number populated 4214
Possible 21


100%|██████████| 21/21 [00:00<00:00, 20041.04it/s]


Number populated 21
Possible 73


100%|██████████| 73/73 [00:00<00:00, 21719.81it/s]


Number populated 73
Possible 1533


100%|██████████| 1533/1533 [00:00<00:00, 18519.21it/s]


Number populated 1533
Possible 1533


100%|██████████| 1533/1533 [00:00<00:00, 13986.06it/s]


Number populated 1387
Possible 2771


100%|██████████| 2771/2771 [00:00<00:00, 22801.88it/s]


Number populated 2193
Possible 17


100%|██████████| 17/17 [00:00<00:00, 17816.88it/s]


Number populated 17
Possible 258


100%|██████████| 258/258 [00:00<00:00, 26953.53it/s]


Number populated 84
Possible 70


100%|██████████| 70/70 [00:00<00:00, 19455.39it/s]


Number populated 70
Possible 70


100%|██████████| 70/70 [00:00<00:00, 19476.04it/s]


Number populated 70
Possible 70


100%|██████████| 70/70 [00:00<00:00, 21374.58it/s]


Number populated 70
Possible 70


100%|██████████| 70/70 [00:00<00:00, 22088.57it/s]


Number populated 70
Possible 5504


100%|██████████| 5504/5504 [00:00<00:00, 17057.99it/s]


Number populated 5504
Possible 1462


100%|██████████| 1462/1462 [00:00<00:00, 24576.76it/s]


Number populated 1156
Possible 39


100%|██████████| 39/39 [00:00<00:00, 20843.25it/s]


Number populated 39
Possible 10


100%|██████████| 10/10 [00:00<00:00, 16871.70it/s]


Number populated 10
Possible 32


100%|██████████| 32/32 [00:00<00:00, 21711.05it/s]


Number populated 32
Possible 20629


100%|██████████| 20629/20629 [00:02<00:00, 9699.54it/s] 


Number populated 19257
Possible 32


100%|██████████| 32/32 [00:00<00:00, 19778.62it/s]


Number populated 32
Possible 32


100%|██████████| 32/32 [00:00<00:00, 23305.74it/s]


Number populated 32
Possible 32


100%|██████████| 32/32 [00:00<00:00, 20886.67it/s]


Number populated 32
Possible 32


100%|██████████| 32/32 [00:00<00:00, 22362.17it/s]


Number populated 32
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 16974.15it/s]


Number populated 6020
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 6384.02it/s]


Number populated 6020
Possible 10780


100%|██████████| 10780/10780 [00:00<00:00, 12648.28it/s]


Number populated 10780
Possible 4081


100%|██████████| 4081/4081 [00:00<00:00, 16915.93it/s]


Number populated 4081
Possible 4081


100%|██████████| 4081/4081 [00:00<00:00, 18919.78it/s]


Number populated 4081
Possible 4081


100%|██████████| 4081/4081 [00:00<00:00, 19248.00it/s]


Number populated 4081
Possible 4081


100%|██████████| 4081/4081 [00:00<00:00, 19345.61it/s]


Number populated 4081
Possible 5160


100%|██████████| 5160/5160 [00:00<00:00, 17760.28it/s]


Number populated 5160
Possible 33


100%|██████████| 33/33 [00:00<00:00, 13905.17it/s]


Number populated 33
Possible 3654


100%|██████████| 3654/3654 [00:00<00:00, 19428.22it/s]


Number populated 3654
Possible 7987


100%|██████████| 7987/7987 [00:00<00:00, 15474.09it/s]


Number populated 7987
Possible 7987


100%|██████████| 7987/7987 [00:00<00:00, 14285.87it/s]


Number populated 7987
Possible 2464


100%|██████████| 2464/2464 [00:00<00:00, 21146.90it/s]


Number populated 2464
Possible 38


100%|██████████| 38/38 [00:00<00:00, 20136.90it/s]


Number populated 38
Possible 3003


100%|██████████| 3003/3003 [00:00<00:00, 19123.84it/s]


Number populated 2769
Possible 3003


100%|██████████| 3003/3003 [00:00<00:00, 20113.34it/s]


Number populated 2808
Possible 258


100%|██████████| 258/258 [00:00<00:00, 22625.93it/s]


Number populated 258
Possible 1617


100%|██████████| 1617/1617 [00:00<00:00, 22425.87it/s]


Number populated 1617
Possible 5852


100%|██████████| 5852/5852 [00:00<00:00, 20256.13it/s]


Number populated 3952
Possible 5852


100%|██████████| 5852/5852 [00:00<00:00, 21417.64it/s]


Number populated 3116
Possible 22820


100%|██████████| 22820/22820 [00:02<00:00, 8333.76it/s] 


Number populated 22820
Possible 16800


100%|██████████| 16800/16800 [00:01<00:00, 9811.73it/s] 


Number populated 16800
Possible 6020


100%|██████████| 6020/6020 [00:00<00:00, 16712.51it/s]


Number populated 6020
Possible 8428


100%|██████████| 8428/8428 [00:00<00:00, 14526.93it/s]


Number populated 8428
Possible 912


100%|██████████| 912/912 [00:00<00:00, 12216.22it/s]


Number populated 912
Possible 3268


100%|██████████| 3268/3268 [00:00<00:00, 19767.62it/s]


Number populated 3268
Possible 2926


100%|██████████| 2926/2926 [00:00<00:00, 20423.25it/s]


Number populated 2926
Possible 9120


100%|██████████| 9120/9120 [00:00<00:00, 14330.64it/s]


Number populated 9120
Possible 3268


100%|██████████| 3268/3268 [00:00<00:00, 19167.71it/s]


Number populated 3268
Possible 3268


100%|██████████| 3268/3268 [00:00<00:00, 19011.60it/s]

Number populated 3268





In [21]:
# Add more metadata
meta_dfs = []
for i, df in enumerate(swap_dfs):
    rxn = rxn_df.iloc[i]
    n = len(df)
    df['rxn_df_index'] = [i] * n
    df['possible_n_swaps'] = [rxn['possible_n_swaps']] * n
    df['possible_m_swaps'] = [rxn['possible_m_swaps']] * n
    df['possible_lig_swaps'] = [rxn['possible_lig_swaps']] * n
    df['total_lig_charges'] = [rxn['total_lig_charges']] * n
    meta_dfs.append(df)
combined_df = pd.concat(meta_dfs)

In [22]:
# Save enumerated reactions.
combined_df.to_pickle('enumerated_rxns.pkl')

In [23]:
# Development -> Testing
swapdf = enumerate_all_rxns(rxn_df.iloc[10])

Possible 7527


100%|██████████| 7527/7527 [00:00<00:00, 14290.08it/s]

Number populated 7527





In [24]:
# Look at rxn
swapdf.iloc[0].to_dict()

{'rxn_uid': 'MOR11_Pd0_Charge2_UHF0_swaplig_0_newlig_c1cnc2cc1ccc1cccnc21212',
 'rxn_name': 'MOR11',
 'reactants': 'ED11+C2H6',
 'products': 'PR11',
 'init_sdf': 'charge=0\n     RDKit          3D\n\n  0  0  0  0  0  0  0  0  0  0999 V3000\nM  V30 BEGIN CTAB\nM  V30 COUNTS 34 35 0 0 1\nM  V30 BEGIN ATOM\nM  V30 1 C -0.947173 -0.209042 0.280017 0\nM  V30 2 C 0.584528 -0.209036 0.280017 0\nM  V30 3 H -1.343705 -1.230228 0.280017 0\nM  V30 4 H -1.343731 0.301555 -0.604348 0\nM  V30 5 H -1.343732 0.301554 1.164382 0\nM  V30 6 H 0.981086 -0.566025 1.236767 0\nM  V30 7 H 0.981088 -0.859121 -0.507510 0\nM  V30 8 H 0.981061 0.798032 0.110795 0\nM  V30 9 C -3.581762 -0.853845 7.137737 0\nM  V30 10 C -4.313837 -1.484199 8.137328 0\nM  V30 11 C -3.664760 -1.854865 9.313996 0\nM  V30 12 C -2.308524 -1.581813 9.447842 0\nM  V30 13 H -4.024025 -0.538020 6.195705 0\nM  V30 14 H -5.371029 -1.677251 7.988997 0\nM  V30 15 H -4.204457 -2.348937 10.116013 0\nM  V30 16 H -1.785823 -1.862248 10.355073 0\nM  

In [25]:
n = 0
swapdf = enumerate_all_rxns(rxn_df.iloc[n])
k = 106
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key:
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False)
mols = convert_io_molecule(thing)
view_structures(mols,labelinds=True)

Possible 8162


100%|██████████| 8162/8162 [00:00<00:00, 15009.77it/s]

Number populated 8162
rxn_uid : MOR1_Eu2_Charge2_UHF7_swaplig_1_newlig_c1ccccc1Sbc1ccccc1c1ccccc16
rxn_name : MOR1
reactants : ED01+CO
products : PR01
metal_inds : [2]
metal_syms : ['Eu']
metal_oxs : [2]
total_charge : 2
total_uhf : 7
swap_remove_inds : [7 8]
swap_functional_groups : ['c1ccc(cc1)[Sb](c1ccccc1)c1ccccc1']
swap_functionalization_inds : [[2]]
swap_functional_group_mol_inds : [[6]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False





MOR1_Eu2_Charge2_UHF7_swaplig_1_newlig_c1ccccc1Sbc1ccccc1c1ccccc16.sdf
