# This notebook is meant to develop/debug generations that might be failing at a higher rate.

In [1]:
import pandas as pd
from architector import view_structures, convert_io_molecule
from swap_production import perform_swap
import numpy as np

In [2]:
perform_swap?

[0;31mSignature:[0m
[0mperform_swap[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mswapdict[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwriteout[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutput_path[0m[0;34m=[0m[0;34m'output_sdfs'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcall[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mskip_checks[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
perform the swap

Args:
    swapdict (dict): swap dictionary
    writeout (bool, optional): write sdf file. Defaults to False.
    output_path (str, optional): where to write out. Defaults to "output_sdfs".
    call (int, optional): number of call attempt. Defaults to 0.
    skip_checks (bool, optional): return regardless of distances.

Returns:
    sdfstr : init/final functionalized sdf str.
[0;31mFile:[0m      ~/software/coordcomplexsampling/ts_path_edit/4_prod

In [3]:
gendf = pd.read_pickle('to_gen_structures_parallel.pkl')

In [4]:
gendf.iloc[0]

rxn_uid                                 MOR4_Eu2_Charge1_UHF7_swaplig_None_newlig_None
rxn_name                                                                          MOR4
reactants                                                                      ED04+CO
products                                                                          PR04
init_sdf                             charge=0\n     RDKit          3D\n\n  0  0  0 ...
final_sdf                            charge=0\n     RDKit          3D\n\n  0  0  0 ...
metal_inds                                                                        [12]
metal_syms                                                                        [Eu]
metal_oxs                                                                          [2]
total_charge                                                                        -1
total_uhf                                                                            7
swap_remove_inds                           

# Look at a particular reaction swap

In [5]:
swapdf = gendf[gendf.rxn_name == 'MOR4']
# Get the first row
k = 0
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False)
print(thing[0])
mols = convert_io_molecule(thing[1])
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : MOR4_Eu2_Charge1_UHF7_swaplig_None_newlig_None
rxn_name : MOR4
reactants : ED04+CO
products : PR04
metal_inds : [12]
metal_syms : ['Eu']
metal_oxs : [np.int64(2)]
total_charge : -1
total_uhf : 7
swap_remove_inds : None
swap_functional_groups : None
swap_functionalization_inds : None
swap_functional_group_mol_inds : None
swap_bond_orders : None
swap_remove_hydrogens_when_adding : None
swap_xtb_opt : None
rxn_df_index : 3
possible_n_swaps : 53
possible_m_swaps : 53
possible_lig_swaps : 0
total_lig_charges : -3


MOR4_Eu2_Charge1_UHF7_swaplig_None_newlig_None_rev.sdf
Initial			Final


In [6]:
swapdf = gendf[gendf.rxn_name == 'ROST20']
# Get the first row
k = 0
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False)
print(thing[0])
mols = convert_io_molecule(thing[1])
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : ROST20_Tc4_Charge2_UHF3_swaplig_1_newlig_CCN2
rxn_name : ROST20
reactants : m40+m41
products : m42
metal_inds : [11]
metal_syms : ['Tc']
metal_oxs : [np.int64(4)]
total_charge : 2
total_uhf : 3
swap_remove_inds : [14 16]
swap_functional_groups : ['CC#N']
swap_functionalization_inds : [[11]]
swap_functional_group_mol_inds : [[2]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False
rxn_df_index : 59
possible_n_swaps : 3234
possible_m_swaps : 21
possible_lig_swaps : 154
total_lig_charges : -2


ROST20_Tc4_Charge2_UHF3_swaplig_1_newlig_CCN2.sdf
Initial			Final


In [7]:
swapdf = gendf[gendf.rxn_name == 'ROST20']
# Get the first row
k = 0
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False)
print(thing[0])
mols = convert_io_molecule(thing[1])
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : ROST20_Tc4_Charge2_UHF3_swaplig_1_newlig_CCN2
rxn_name : ROST20
reactants : m40+m41
products : m42
metal_inds : [11]
metal_syms : ['Tc']
metal_oxs : [np.int64(4)]
total_charge : 2
total_uhf : 3
swap_remove_inds : [14 16]
swap_functional_groups : ['CC#N']
swap_functionalization_inds : [[11]]
swap_functional_group_mol_inds : [[2]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False
rxn_df_index : 59
possible_n_swaps : 3234
possible_m_swaps : 21
possible_lig_swaps : 154
total_lig_charges : -2


ROST20_Tc4_Charge2_UHF3_swaplig_1_newlig_CCN2_rev.sdf
Initial			Final


In [16]:
# Here is one that seems to fail.
swapdf = gendf[gendf.rxn_name == 'ROST47']
# Get the first row
k = 0
swapdict = swapdf.iloc[k].to_dict()
for key,val in swapdict.items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdict['init_sdf'],labelinds=True)
thing = perform_swap(swapdict, writeout=False)
print(thing[0])
mols = convert_io_molecule(thing[1])
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : ROST47_V5_Charge0_UHF0_swaplig_0_newlig_SCN2
rxn_name : ROST47
reactants : m114+m115
products : m116+m41+m41+m107
metal_inds : [0]
metal_syms : ['V']
metal_oxs : [np.int64(5)]
total_charge : 0
total_uhf : 0
swap_remove_inds : [17]
swap_functional_groups : ['S=C=[N-]']
swap_functionalization_inds : [[0]]
swap_functional_group_mol_inds : [[2]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False
rxn_df_index : 80
possible_n_swaps : 258
possible_m_swaps : 3
possible_lig_swaps : 86
total_lig_charges : -5


ROST47_V5_Charge0_UHF0_swaplig_0_newlig_SCN2_rev.sdf
Initial			Final


In [19]:
if 'rev' in thing[0]:
    init_ind = 1
    fin_ind = 0
else:
    init_ind = 0
    fin_ind = 1
og_final = convert_io_molecule(swapdict['final_sdf'])
og_final_dict = og_final.classify_metal_geo_type(return_result=True)[0]
original_final_geo = og_final_dict['metal_geo_type']
original_final_loss = og_final_dict['classification_dict'][original_final_geo]
newloss_final = mols[fin_ind].classify_metal_geo_type(return_result=True)[0]['classification_dict'][original_final_geo]
print('Final Structure Loss:',np.abs(original_final_loss-newloss_final))
og_init = convert_io_molecule(swapdict['init_sdf'])
og_init_dict = og_init.classify_metal_geo_type(return_result=True)[0]
original_init_geo = og_init_dict['metal_geo_type']
original_init_loss = og_init_dict['classification_dict'][original_init_geo]
newloss_init = mols[init_ind].classify_metal_geo_type(return_result=True)[0]['classification_dict'][original_init_geo]
print('Init Structure Loss:',np.abs(original_init_loss-newloss_init))

Final Structure Loss: 2.2720264978947924
Init Structure Loss: 17.87827642211476


In [18]:
og_final_dict

[{'metal': 'Nb',
  'metal_ind': 0,
  'metal_geo_type': 'trigonal_bipyramidal',
  'mae_angle_loss': np.float64(10.547459087855707),
  'confidence': np.float64(0.27448335102318616),
  'classification_dict': {'trigonal_bipyramidal': np.float64(10.547459087855707),
   'square_pyramidal': np.float64(14.537859472598793),
   'pentagonal_planar': np.float64(18.355710669636743)}}]

In [12]:
mols[0].classify_metal_geo_type?

[0;31mSignature:[0m [0mmols[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m.[0m[0mclassify_metal_geo_type[0m[0;34m([0m[0mreturn_result[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
classify_metal_geo_type calculate the actual geometry of the metal centers

Parameters
----------
return_results: bool, optional
    return the results, by default False

Returns
-------
metal_center_geos : list, optional
    metal center geometries present in the mol2string.
[0;31mFile:[0m      ~/software/Architector/architector/io_molecule.py
[0;31mType:[0m      method

In [29]:
# Here is one that seems to fail.
swapdf = gendf[gendf.rxn_name == 'MOR38']
# Get the first row
k = 1
swapdict = swapdf.iloc[k].to_dict()
for key,val in swapdict.items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdict['init_sdf'],labelinds=True)
thing = perform_swap(swapdict, writeout=False)
print(thing[0])
mols = convert_io_molecule(thing[1])
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : MOR38_Ce4_Charge4_UHF0_swaplig_0_newlig_N1CCNC13
rxn_name : MOR38
reactants : ED37+PMe3+PMe3+PMe3
products : PR38+Bz
metal_inds : [39]
metal_syms : ['Ce']
metal_oxs : [4]
total_charge : 4
total_uhf : 0
swap_remove_inds : [40 41]
swap_functional_groups : ['N1C=CN=C1']
swap_functionalization_inds : [[39]]
swap_functional_group_mol_inds : [[3]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False
rxn_df_index : 38
possible_n_swaps : 1309
possible_m_swaps : 17
possible_lig_swaps : 77
total_lig_charges : 0


MOR38_Ce4_Charge4_UHF0_swaplig_0_newlig_N1CCNC13_rev.sdf
Initial			Final


In [30]:
if 'rev' in thing[0]:
    init_ind = 1
    fin_ind = 0
else:
    init_ind = 0
    fin_ind = 1
og_final = convert_io_molecule(swapdict['final_sdf'])
og_final_dict = og_final.classify_metal_geo_type(return_result=True)[0]
original_final_geo = og_final_dict['metal_geo_type']
original_final_loss = og_final_dict['classification_dict'][original_final_geo]
newloss_final = mols[fin_ind].classify_metal_geo_type(return_result=True)[0]['classification_dict'].get(original_final_geo,100)
print('Final Structure Loss:',np.abs(original_final_loss-newloss_final))
og_init = convert_io_molecule(swapdict['init_sdf'])
og_init_dict = og_init.classify_metal_geo_type(return_result=True)[0]
original_init_geo = og_init_dict['metal_geo_type']
original_init_loss = og_init_dict['classification_dict'][original_init_geo]
newloss_init = mols[init_ind].classify_metal_geo_type(return_result=True)[0]['classification_dict'].get(original_init_geo,100)
print('Init Structure Loss:',np.abs(original_init_loss-newloss_init))

Final Structure Loss: 0.13023149289825398
Init Structure Loss: 8.678673012960516


In [25]:
mols[fin_ind].classify_metal_geo_type(return_result=True)[0]['classification_dict']

{'trigonal_prismatic': np.float64(9.062943089555398),
 'pentagonal_pyramidal': np.float64(10.026162574182733),
 'hexagonal_planar': np.float64(20.70442561020906),
 'octahedral': np.float64(23.897738152326188)}

In [26]:
original_final_geo

'trigonal_bipyramidal'

In [27]:
mols[init_ind].classify_metal_geo_type(return_result=True)[0]['classification_dict']

{'tri_tri_mer_capped': np.float64(21.654748914390577),
 'capped_square_antiprismatic': np.float64(21.81258839033633),
 'cn9_YICLED': np.float64(22.80524297827886),
 'am_c3_9H2O_c0': np.float64(23.097016012543406),
 'tricapped_trigonal_prismatic': np.float64(23.66334785811956)}

In [28]:
original_init_geo

'pentagonal_planar'