# This notebook is meant to develop/debug generations that might be failing at a higher rate.

In [1]:
import pandas as pd
from architector import view_structures, convert_io_molecule
from swap_production import perform_swap
import numpy as np

In [2]:
perform_swap?

[0;31mSignature:[0m
[0mperform_swap[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mswapdict[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwriteout[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moutput_path[0m[0;34m=[0m[0;34m'output_sdfs'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcall[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mskip_checks[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
perform the swap

Args:
    swapdict (dict): swap dictionary
    writeout (bool, optional): write sdf file. Defaults to False.
    output_path (str, optional): where to write out. Defaults to "output_sdfs".
    call (int, optional): number of call attempt. Defaults to 0.
    skip_checks (bool, optional): return regardless of distances.

Returns:
    sdfstr : init/final functionalized sdf str.
[0;31mFile:[0m      ~/software/coordcomplexsampling/ts_path_edit/4_prod

In [3]:
gendf = pd.read_pickle('to_gen_structures_parallel.pkl')

In [4]:
gendf.iloc[0]

rxn_uid                                 MOR4_Eu2_Charge1_UHF7_swaplig_None_newlig_None
rxn_name                                                                          MOR4
reactants                                                                      ED04+CO
products                                                                          PR04
init_sdf                             charge=0\n     RDKit          3D\n\n  0  0  0 ...
final_sdf                            charge=0\n     RDKit          3D\n\n  0  0  0 ...
metal_inds                                                                        [12]
metal_syms                                                                        [Eu]
metal_oxs                                                                          [2]
total_charge                                                                        -1
total_uhf                                                                            7
swap_remove_inds                           

# Look at a particular reaction swap

In [6]:
swapdf = gendf[gendf.rxn_name == 'MOR4']
# Get the first row
k = 0
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False)
mols = convert_io_molecule(thing)
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : MOR4_Eu2_Charge1_UHF7_swaplig_None_newlig_None
rxn_name : MOR4
reactants : ED04+CO
products : PR04
metal_inds : [12]
metal_syms : ['Eu']
metal_oxs : [np.int64(2)]
total_charge : -1
total_uhf : 7
swap_remove_inds : None
swap_functional_groups : None
swap_functionalization_inds : None
swap_functional_group_mol_inds : None
swap_bond_orders : None
swap_remove_hydrogens_when_adding : None
swap_xtb_opt : None
rxn_df_index : 3
possible_n_swaps : 53
possible_m_swaps : 53
possible_lig_swaps : 0
total_lig_charges : -3


Initial			Final


In [7]:
swapdf = gendf[gendf.rxn_name == 'ROST20']
# Get the first row
k = 0
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False)
mols = convert_io_molecule(thing)
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : ROST20_Tc4_Charge2_UHF3_swaplig_1_newlig_CCN2
rxn_name : ROST20
reactants : m40+m41
products : m42
metal_inds : [11]
metal_syms : ['Tc']
metal_oxs : [np.int64(4)]
total_charge : 2
total_uhf : 3
swap_remove_inds : [14 16]
swap_functional_groups : ['CC#N']
swap_functionalization_inds : [[11]]
swap_functional_group_mol_inds : [[2]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False
rxn_df_index : 59
possible_n_swaps : 3234
possible_m_swaps : 21
possible_lig_swaps : 154
total_lig_charges : -2


Initial			Final


In [11]:
# Here is one that seems to fail.
swapdf = gendf[gendf.rxn_name == 'ROST47']
# Get the first row
k = 0
for key,val in swapdf.iloc[k].to_dict().items():
    if 'sdf' not in key: # Look at the dictionary
        print(key,':',val)
view_structures(swapdf.iloc[k].to_dict()['init_sdf'],labelinds=True)
thing = perform_swap(swapdf.iloc[k].to_dict(), writeout=False, skip_checks=True)
mols = convert_io_molecule(thing)
print('Initial\t\t\tFinal')
view_structures(mols)

rxn_uid : ROST47_V5_Charge0_UHF0_swaplig_0_newlig_SCN2
rxn_name : ROST47
reactants : m114+m115
products : m116+m41+m41+m107
metal_inds : [0]
metal_syms : ['V']
metal_oxs : [np.int64(5)]
total_charge : 0
total_uhf : 0
swap_remove_inds : [17]
swap_functional_groups : ['S=C=[N-]']
swap_functionalization_inds : [[0]]
swap_functional_group_mol_inds : [[2]]
swap_bond_orders : [[1]]
swap_remove_hydrogens_when_adding : [[False]]
swap_xtb_opt : False
rxn_df_index : 80
possible_n_swaps : 258
possible_m_swaps : 3
possible_lig_swaps : 86
total_lig_charges : -5


Initial			Final
