## Here we have code to create molecules given ligands

In [3]:
import sqlite3
import json
import copy
import numpy as np

from ase import io, Atoms
from ase.build import molecule

from collections import Counter, defaultdict
from scipy.spatial import distance_matrix
from itertools import combinations
from openbabel.pybel import readfile
from sympy import Plane, Point3D
from openbabel import openbabel
from math import pi ,sin, cos, sqrt, acos
import pickle
import optuna
from tqdm.notebook import tqdm
con = sqlite3.connect('ligands.db')
cur = con.cursor()

cur.execute('''CREATE TABLE IF NOT EXISTS ligand_data
              (ccdc_id text, denticity integer, coordinating_elements text, coordinating_indices text,
               original_metal integer, xyz text)'''
          )

<sqlite3.Cursor at 0x7f92dcb2d030>

## Helper mathematical functions for the rotations and translations to be done when adding ligands for generic coordinates

In [12]:

def dotproduct(v1, v2):
    return sum((a*b) for a, b in zip(v1, v2))

def length(v):
    return sqrt(dotproduct(v, v))

def angle(v1, v2):
    try:
        ang = acos(dotproduct(v1, v2) / (length(v1) * length(v2)))
    except ValueError:
        v2[2] = v2[2]*0.95
        ang = acos(dotproduct(v1, v2) / (length(v1) * length(v2)))
    return ang

def R(theta, u):
    return [[cos(theta) + (u[0]**2) * (1-cos(theta)), 
             u[0] * u[1] * (1-cos(theta)) - u[2] * sin(theta), 
             u[0] * u[2] * (1 - cos(theta)) + u[1] * sin(theta)],
            [u[0] * u[1] * (1-cos(theta)) + u[2] * sin(theta),
             cos(theta) + (u[1]**2) * (1-cos(theta)),
             u[1] * u[2] * (1 - cos(theta)) - u[0] * sin(theta)],
            [u[0] * u[2] * (1-cos(theta)) - u[1] * sin(theta),
             u[1] * u[2] * (1-cos(theta)) + u[0] * sin(theta),
             cos(theta) + (u[2]**2) * (1-cos(theta))]]

def check_planarity(ligand_connect_pos, ligand_connect_indices):
    plane = Plane(Point3D(list(ligand_positions[ligand_connect_indices[0]])), 
                  Point3D(list(ligand_positions[ligand_connect_indices[1]])), 
                  Point3D(list(ligand_positions[ligand_connect_indices[2]])))
    point2plane = []
    for idx, point in enumerate(ligand_positions):
        if idx in ligand_connect_indices:
            continue
        dist = plane.distance(Point3D(point))
        point2plane.append(float(dist))
    return point2plane

def Rotate(pointsToRotate, point, theta):
    """
    adapted from: https://stackoverflow.com/questions/17763655/rotation-of-a-point-in-3d-about-an-arbitrary-axis-using-python
    needs to be origin
    rotates given:
    pointsToRotate: the atoms to rotate around the axis
    point: the coordinates to define one of the axes (eg. the metal).
    Requires the connecting point to be at the origin.
    theta: the angle to rotate through
    """
    diff = -point
    # make it unit norm
    u = diff / np.linalg.norm(diff)
    r = R(theta, u)
    rotated = []
    orig_shape = pointsToRotate.shape
    for point in pointsToRotate:
        for i in range(3):
            rotated.append(sum([r[j][i] * point[j] for j in range(3)]))
    
    return list(np.array(rotated).reshape(orig_shape))

def get_axial(metal_coord, tetra_coords):
    """
    Gets the axial positions of an assumed tetradentate arrangement of atoms, 
    does this by finding the vector which is at ~ right angles to the other
    vectors defined by (atom position - metal position).
    metal_coord: Coordinates of the metal center
    tetra_coords: coordinates of the metals connecting to the atom
    """
    assert len(tetra_coords)==4
    metal2connection = np.zeros(shape=(4, 3))
    metaldirections = []
    for idx, tetra_coord in enumerate(tetra_coords):
        metal2connection[idx] = -(metal_coord-tetra_coord)
        metaldirections.append(metal_coord-tetra_coord)
    connection_coords = []
    angles_between = []
    perps = []
    for coord in combinations(metaldirections, 2):
        angle_between = angle(coord[0], coord[1])
        angles_between.append(angle_between)
        connection_coords.append(coord[0]-coord[1])
        if 2.7<angle_between:
            #assume found the direction which defines the opposite
            #atoms which connect to the metal.
            perps.append(coord[0]-coord[1])
    if len(perps)!=2:
        return None
    assert len(perps)==2
    axial = np.cross(perps[0], perps[1])
    axial_vec = np.cross(perps[0], perps[1])
    axial_vec_rel = axial_vec-metal_coord
    orig_length = np.linalg.norm(axial_vec_rel)
    desired_length = np.mean([length(x) for x in metaldirections])
    scale = desired_length/np.linalg.norm(axial_vec_rel)
    axial_vec_rel*=scale
    axial_back = axial_vec_rel+metal_coord
    return axial_back

def add_ligand(structure, connecting_atom_coord, ligand2add):
    # add the tmqm ligand
    positions = structure.positions
    # translate the original ligand xyz to the new position
    # define the direction at which we will add the atom
    
    ligand_conn_pos = connecting_atom_coord
    ligand_positions = ligand2add.positions
    ligand_atoms = ligand2add.numbers
    # connecting point must be the first atom in the Atoms object.
    translate_lig = ligand_positions[0]-ligand_conn_pos
    lig_pos = ligand_positions-translate_lig
    # add the new ligand
    pos_w_ligand = np.append(positions, lig_pos, 0)
    # make connecting point the origin
    pos_w_ligand = pos_w_ligand-lig_pos[0]
    atomic_nums = np.append(structure.numbers, ligand_atoms)
    struct_w_ligand = Atoms(atomic_nums, pos_w_ligand)
    return struct_w_ligand

def max_dist_objective(trial, positions, first_index, last_index, stationary_coords, axes):
    # we maximise this metric such that the ligands are far from eachother
    angle1 = trial.suggest_float('ang1', 0, 3.14)
    angle2 = trial.suggest_float('ang2', 0, 3.14)
    angle3 = trial.suggest_float('ang3', 0, 3.14)

    positions[first_index:last_index] = Rotate(positions[first_index:last_index], axes[0], angle1)
    positions[first_index:last_index] = Rotate(positions[first_index:last_index], axes[1], angle2)
    positions[first_index:last_index] = Rotate(positions[first_index:last_index], axes[2], angle3)
    dist_mat = distance_matrix(stationary_coords, positions[first_index:last_index]).flatten()
    
    mean_dist = np.mean(dist_mat)
    min_dist = np.min(dist_mat)
    return mean_dist+min_dist

def optimise_ligand_position(struct_w_ligand, ccdc_id, 
                             first_lig_index, last_lig_index, savename, 
                             add_min=False, min_indices=None):
    positions = struct_w_ligand.positions
    assert last_lig_index-first_lig_index>1
    # so that if the ligand is small, we don't iterate over as many orientations
    if 7<(last_lig_index-first_lig_index):
        space_between_angs = 0.15
    elif 1<last_lig_index-first_lig_index<7:
        space_between_angs = 0.6
        
    angle_range = np.arange(0, 3.14, space_between_angs)
    # define other axes, metal is first atom.
    other_ax = np.array([positions[0][1], -positions[0][0], positions[0][2]])
    final_ax = np.array([positions[0][0], positions[0][2], -positions[0][1]])
    # first axis is defined as the position of the metal, since we set the 
    # connecting atom as the origin
    axes_def = (positions[0], other_ax, final_ax)
    mean_dists = [0]
    min_dists = [0]
    if last_lig_index==len(positions):
        unmoving_coords = positions[:first_lig_index]
    else:
        unmoving_coords = np.append(positions[:first_lig_index], positions[-(len(positions)-last_lig_index):], 0)
    
    for angles in combinations(angle_range, 3):
        pos2rotate = positions
        pos2rotate[first_lig_index:last_lig_index] = Rotate(pos2rotate[first_lig_index:last_lig_index],
                                                            axes_def[0], angles[0])
        pos2rotate[first_lig_index:last_lig_index] = Rotate(pos2rotate[first_lig_index:last_lig_index],
                                                            axes_def[1], angles[1])
        pos2rotate[first_lig_index:last_lig_index] = Rotate(pos2rotate[first_lig_index:last_lig_index], 
                                                            axes_def[2], angles[2])
        dist_mat = distance_matrix(unmoving_coords, pos2rotate[first_lig_index:last_lig_index]).flatten()
        if 7<(last_lig_index-first_lig_index):
            mean_dist = np.mean(dist_mat)
            min_dist = np.min(dist_mat)
            bool2check = (0.5*mean_dist/max(mean_dists)+min_dist/max(min_dists))>1.5
            mean_dists.append(mean_dist)
            min_dists.append(min_dist)

        elif 1<last_lig_index-first_lig_index<7:
            min_dist = np.min(dist_mat)
            bool2check = min_dist>max(min_dists)
            min_dists.append(min_dist)
        if bool2check:
            best_pos = copy.copy(pos2rotate)
            thing = Atoms(struct_w_ligand.numbers, positions=pos2rotate)
    if add_min:
        assert len(min_indices)==2
        
        dist_mat = distance_matrix(best_pos[:min_indices[0]], 
                                   best_pos[min_indices[0]:min_indices[1]]).flatten()
        min_dist = np.min(dist_mat)
        thing.write("oh_func_catalysts/save_as_opt_{}_{}_{}.xyz".format(ccdc_id, savename, round(min_dist, 1)))
    else:
        pass

    return thing


def add2tridentate_trans(tri_ligand_xyz_str, connecting_indices, tmqm_id, ligand2add_1, ligand2add_2, describer):
    """
    Adds a ligand to a tridentate ligand, aiming to add it in the same plane
    as the atoms that connect to the metal.
    Args:
    tri_ligand_xyz_str: A string of the xyz of the separated tridentate ligand.
    connecting_indices: The indices of atoms in the ligand that connect to the metal.
    tmqm_id: The id stored in the ccdc
    ligand2add_1/2: Ase Atoms object, with the 0th index referring to the connecting point.
    describer: A string to add description for the catalyst to be written to file
    """
    with open("tmp__.xyz", "w") as f:
        f.write(tri_ligand_xyz_str)
    ligand = io.read("tmp__.xyz")
    connecting_positions = np.zeros(shape=(3, 3))
    for idx, conn_idx in enumerate(connecting_indices):
        connecting_positions[idx] = ligand.positions[conn_idx]
    # define the vectors that make up the tridentate triangle
    vec1 = connecting_positions[0]-connecting_positions[1]
    vec2 = connecting_positions[1]-connecting_positions[2]
    vec3 = connecting_positions[0]-connecting_positions[2]
    # need mapping to determine which vertex of the triangle
    # is the point at which we have the highest angle.
    # The key is the index of the angles list, the value is the vertex index,
    # eg. vec1 and vec2 shre the index 1, so that is the intersection point.
    angle_idx2_vertex_idx = {0: 1, 1: 0, 2: 2}
    angles = [angle(-vec1, vec2), angle(vec1, vec3), angle(vec2, vec3)]
    sum_angles = sum(angles)
    opposite_atom = connecting_positions[angle_idx2_vertex_idx[np.argmax(angles)]]
    assert abs(sum_angles-3.141)<0.001

    tmqm_xyz = io.read("all_relevant_xyzs/{}.xyz".format(tmqm_id))
    metal_coord = tmqm_xyz.positions[np.argmax(tmqm_xyz.numbers)]
    
    # sanity check the position of atoms
    connecting_distances = set(distance_matrix(connecting_positions, connecting_positions).flatten())
    # remove the 0 value since atoms are obviously 0 Angstroms from themselves
    connecting_distances.remove(0)
    min_dist = min(connecting_distances)
    sum_distances = 0
    for dist in metal_coord-connecting_positions:
        sum_distances+=(np.linalg.norm(dist))
    avg_dist = sum_distances/3
    if avg_dist>min_dist:
        #  The connecting atoms are in some cases closer to eachother than the metal
        return 
    
    # add the tmqm ligand
    positions = np.append([metal_coord], ligand.positions, 0)
    atomic_nums = [26]+list(ligand.numbers)
    struct2addto = Atoms(atomic_nums, positions)
    metal2opposite = metal_coord-opposite_atom
    opposite_atom = opposite_atom+2*metal2opposite
    intermediate_atoms = Atoms(symbols=[8, 1], 
                               positions=[opposite_atom, opposite_atom+[0.8, 0, 0]])
    structure = add_ligand(struct2addto, opposite_atom,  intermediate_atoms)
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(intermediate_atoms), len(structure),
                                         "")

    translate_lig = ligand2add_1.positions[0]-opposite_atom
    lig_pos = ligand2add_1.positions-translate_lig
    tetra_pos = list(list(connecting_positions)-lig_pos[0])
    tetra_pos = [list(x) for x in tetra_pos]
    tetra_pos.append([0, 0, 0])
    coord = get_axial(structure.positions[0], tetra_pos)
    if coord is None:
        return

    structure = add_ligand(structure, coord, ligand2add_1)
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_1), len(structure),
                                         "")
    new_pos = 2*structure.positions[0]
    structure = add_ligand(structure, new_pos, ligand2add_2)
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_2), len(structure),
                                         "")
    structure.positions = structure.positions-structure.positions[-len(ligand2add_1)-len(ligand2add_2)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_1)-len(ligand2add_2),
                                         len(structure)-len(ligand2add_2),
                                         "{}_before_trans".format(describer), add_min=True)
    structure.positions = structure.positions-structure.positions[-len(ligand2add_2)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_2), len(structure),
                                         "{}_final_trans".format(describer), add_min=True)


def add2tridentate(tri_ligand_xyz_str, connecting_indices, tmqm_id, ligand2add_1, ligand2add_2, describer):
    """
    Adds a ligand to a tridentate ligand, aiming to add it in the same plane
    as the atoms that connect to the metal.
    Args:
    tri_ligand_xyz_str: A string of the xyz of the separated tridentate ligand.
    connecting_indices: The indices of atoms in the ligand that connect to the metal.
    tmqm_id: The id stored in the ccdc
    ligand2add: Ase Atoms object, with the 0th index referring to the connecting point.
    """
    with open("tmp__.xyz", "w") as f:
        f.write(tri_ligand_xyz_str)
    ligand = io.read("tmp__.xyz")
    connecting_positions = np.zeros(shape=(3, 3))
    for idx, conn_idx in enumerate(connecting_indices):
        connecting_positions[idx] = ligand.positions[conn_idx]
    # define the vectors that make up the tridentate triangle
    vec1 = connecting_positions[0]-connecting_positions[1]
    vec2 = connecting_positions[1]-connecting_positions[2]
    vec3 = connecting_positions[0]-connecting_positions[2]
    # need mapping to determine which vertex of the triangle
    # is the point at which we have the highest angle.
    # The key is the index of the angles list, the value is the vertex index,
    # eg. vec1 and vec2 shre the index 1, so that is the intersection point.
    angle_idx2_vertex_idx = {0: 1, 1: 0, 2: 2}
    angles = [angle(-vec1, vec2), angle(vec1, vec3), angle(vec2, vec3)]
    sum_angles = sum(angles)
    opposite_atom = connecting_positions[angle_idx2_vertex_idx[np.argmax(angles)]]
    assert abs(sum_angles-3.141)<0.001

    tmqm_xyz = io.read("all_relevant_xyzs/{}.xyz".format(tmqm_id))
    metal_coord = tmqm_xyz.positions[np.argmax(tmqm_xyz.numbers)]
    
    # sanity check the position of atoms
    connecting_distances = set(distance_matrix(connecting_positions, connecting_positions).flatten())
    # remove the 0 value since atoms are obviously 0 Angstroms from themselves
    connecting_distances.remove(0)
    min_dist = min(connecting_distances)
    sum_distances = 0
    for dist in metal_coord-connecting_positions:
        sum_distances+=(np.linalg.norm(dist))
    avg_dist = sum_distances/3
    if avg_dist>min_dist:
        #  The connecting atoms are in some cases closer to eachother than the metal
        return 
    
    # add the tmqm ligand
    positions = np.append([metal_coord], ligand.positions, 0)
    atomic_nums = [26]+list(ligand.numbers)
    struct2addto = Atoms(atomic_nums, positions)
    metal2opposite = metal_coord-opposite_atom
    opposite_atom = opposite_atom+2*metal2opposite
    
    
    structure = add_ligand(struct2addto, opposite_atom, ligand2add_1)

    structure = optimise_ligand_position(structure, tmqm_id, len(structure)-len(ligand2add_1), len(structure),
                                        "")
    translate_lig = ligand2add_2.positions[0]-opposite_atom
    lig_pos = ligand2add_2.positions-translate_lig
    tetra_pos = list(list(connecting_positions)-lig_pos[0])
    tetra_pos = [list(x) for x in tetra_pos]
    tetra_pos.append([0, 0, 0])
    coord = get_axial(structure.positions[0], tetra_pos)
    if coord is None:
        return

    structure = add_ligand(structure, coord, ligand2add_2)
    metal_pos = structure.positions[0]
    # add OH
    intermediate_atoms = Atoms(symbols=[8, 1], 
                               positions=[2*metal_pos, 2*metal_pos+[0.8, 0, 0]])
    structure = add_ligand(structure, 2*metal_pos, intermediate_atoms)
    structure.positions = structure.positions-structure.positions[-len(intermediate_atoms)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(intermediate_atoms), len(structure),
                                         "")
    #structure = append(intermediate_atoms)
    structure.positions = structure.positions-structure.positions[-len(ligand2add_2)-len(intermediate_atoms)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_2)-len(intermediate_atoms), len(structure)-len(intermediate_atoms),
                                         "")
    # now repeat
    structure.positions = structure.positions-structure.positions[-len(ligand2add_1)-len(ligand2add_2)-len(intermediate_atoms)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_1)-len(ligand2add_2)-len(intermediate_atoms), len(structure)-len(ligand2add_2)-len(intermediate_atoms),
                                        "")
    structure.positions = structure.positions-structure.positions[-len(ligand2add_2)-len(intermediate_atoms)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(ligand2add_2)-len(intermediate_atoms), len(structure)-len(intermediate_atoms),
                                         "{}_final".format(describer), 
                                         add_min=True, 
                                         min_indices=(len(structure)-len(ligand2add_1)-len(ligand2add_2)-len(intermediate_atoms), len(structure)))


    
def add2tetra(tetra_ligand_xyz_str, connecting_indices, tmqm_id, ligand2add, describer):
    """
    Adds a ligand to a tetradentate ligand, aiming to add it in the same plane
    as the atoms that connect to the metal.
    Args:
    tetra_ligand_xyz_str: A string of the xyz of the separated tetradentate ligand.
    connecting_indices: The indices of atoms in the ligand that connect to the metal.
    tmqm_id: The id stored in the ccdc
    ligand2add: Ase Atoms object, with the 0th index referring to the connecting point.
    """
    with open("tmp__.xyz", "w") as f:
        f.write(tetra_ligand_xyz_str)
    ligand = io.read("tmp__.xyz")
    connecting_positions = []
    for idx, conn_idx in enumerate(connecting_indices):
        #print(ligand.numbers[conn_idx])
        connecting_positions.append(ligand.positions[conn_idx])

    tmqm_xyz = io.read("all_relevant_xyzs/{}.xyz".format(tmqm_id))
    metal_coord = tmqm_xyz.positions[np.argmax(tmqm_xyz.numbers)]

    # add the tmqm ligand
    positions = np.append([metal_coord], ligand.positions, 0)
    atomic_nums = [26]+list(ligand.numbers)
    structure = Atoms(atomic_nums, positions)
    # need one of the connecting points to be origin
    tetra_pos = list(list(connecting_positions)-connecting_positions[0])
    tetra_pos = [list(x) for x in tetra_pos]
    structure.positions = structure.positions-connecting_positions[0]
    coord = get_axial(structure.positions[0], tetra_pos)
    if coord is None:
        #print('No coordinate returned, stopping')
        return
    structure = add_ligand(structure, coord, ligand2add)
    metal_pos = structure.positions[0]
    intermediate_atoms = Atoms(symbols=[8, 1], 
                               positions=[2*metal_pos, 2*metal_pos+[0.8, 0, 0]])
    structure = add_ligand(structure, 2*metal_pos, intermediate_atoms)
    structure.positions = structure.positions-structure.positions[-len(intermediate_atoms)]
    structure = optimise_ligand_position(structure, tmqm_id, 
                                         len(structure)-len(intermediate_atoms), len(structure),
                                         "")

    structure.positions = structure.positions-structure.positions[-len(ligand2add)-len(intermediate_atoms)]
    #print("Trying to opt adding pyridine...")
    structure = optimise_ligand_position(structure, tmqm_id, 
                                        len(structure)-len(ligand2add)-len(intermediate_atoms), 
                                         len(structure)-len(intermediate_atoms),
                                        "tet_final_{}".format(describer), 
                                         add_min=True, 
                                         min_indices=(len(structure)-len(ligand2add)-len(intermediate_atoms), len(structure)))


    

In [8]:
water_atoms = [8, 1, 1]
water_pos = np.array([
    [0.0, 0.0, 0.0],
    [0.75, 0.00, 0.6],
    [-0.75, 0.00, 0.6]
    ])
water = Atoms(symbols=water_atoms,
             positions=water_pos)

oh_atoms = [8, 1]
oh_pos = np.array([[0.0, 0.0, 0.0],
                  [0.8, 0.0, 0.0]]
                 )
oh_lig = Atoms(symbols=oh_atoms,
              positions=oh_pos)
pyr_atoms = [7, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1]
pyr_pos = np.array([[0.6816, 1.1960, 0.0000],
           [1.3603, 0.0256, 0.0000],
           [0.6971, -1.2020, 0.0000],
           [-0.6944, -1.2184, 0.0000],
           [ -1.3895, -0.0129, 0.0000],
           [-0.6712, 1.1834, 0.0000],
           [2.4530, 0.1083, 0.0000],
           [ 1.2665, -2.1365, 0.0000],
           [-1.2365, -2.1696, 0.0000],
           [-2.4837, 0.0011, 0.0000],
           [-1.1569, 2.1657, 0.0000]
          ])
pyr_pos[:,0] = pyr_pos[:,0]-0.6816
pyr_pos[:,1] = pyr_pos[:,1]-1.1960
pyridine = Atoms(symbols=pyr_atoms,
                 positions=pyr_pos)


In [13]:
string = """select ccdc_id, denticity, coordinating_elements, coordinating_indices, xyz from ligand_data 
            where (coordinating_elements like '%[8]%' or coordinating_elements like '%[7]%' 
            or coordinating_elements like '%[8, 7]%' or coordinating_elements like '%[7, 8]%') 
            and (denticity=3)"""# and (original_metal=25 or original_metal=26 or original_metal=27 or original_metal=28)"""

distinct_ids = pickle.load(open("ccdc_ids_distinct.p", "rb"))

query_res = list(cur.execute(string))
for row in tqdm(query_res):
    if row[0] not in distinct_ids:
        continue
    print(row[0])
    
    add2tridentate(row[4], json.loads(row[3]), row[0], pyridine, pyridine, 'both_pyridine')
    add2tridentate(row[4], json.loads(row[3]), row[0], pyridine, water, 'pyridine_water')
    add2tridentate(row[4], json.loads(row[3]), row[0], water, pyridine, 'water_pyridine')
    # Now do trans-oriented ligands
    add2tridentate_trans(row[4], json.loads(row[3]), row[0], pyridine, pyridine, 'both_pyridine')
    add2tridentate_trans(row[4], json.loads(row[3]), row[0], pyridine, water, 'pyridine_water')
    

  0%|          | 0/5234 [00:00<?, ?it/s]

VEJNUW



divide by zero encountered in double_scalars



ZAJGUP


KeyboardInterrupt: 

In [14]:
string = """select ccdc_id, denticity, coordinating_elements, coordinating_indices, xyz from ligand_data 
            where (coordinating_elements like '%[8]%' or coordinating_elements like '%[7]%' 
            or coordinating_elements like '%[8, 7]%' or coordinating_elements like '%[7, 8]%') 
            and (denticity=4)"""

distinct_ids = pickle.load(open("ccdc_ids_distinct_tetra.p", "rb"))

query_res = list(cur.execute(string))
for row in tqdm(query_res):
    if row[0] not in distinct_ids:
        continue
    print(row[0])
    add2tetra(row[4], json.loads(row[3]), row[0], pyridine, 'pyridine')
    add2tetra(row[4], json.loads(row[3]), row[0], water, 'water')



  0%|          | 0/4081 [00:00<?, ?it/s]

QONGOR



divide by zero encountered in double_scalars



POGGOI
LOMZOE
JOJFAS
GABWEN
NOEPOR
SUCBAT
UYAJIO
LODSEC
IPOVIS
TEYHEL
FITDER
XUCJAI
YIBPAB
PAZBAW
XIJQUC
QAGXEE
GANHEJ
WOPZOQ
TEMKOL
QIBDUD
IWAMAU
FIYQOV
QIPVOC
FEKJEL
AMAROX
GETXAE
XAPJOO
YEGMUT
EGIXUP
YAJLUT
KAWTUY
QUXNAZ
GAQFEM
TEVPIW
LUXFEP
QOHHAW
OYUHIB
PAQFOF
ZICFEW
FEJLAJ
BATLIT
LOKLAB
JAZSAE
WOVNUQ
DOHXED
SOKQUE
RAXZUM
ITERAB
BIBGOJ
NIPXUF
UGINII
KILQON
JIYFIJ
NIMNIH
LEHMES
YEJLOR
NOFVIN
WATTAM
VAVRAN
CALQIS
HATBEL
DOVZUK
YIWTEG
LIYDOM
QOLPUC
QOXVAA
NOQGAB
XOMKUG
TEHGAQ
KULQOZ
SEHRED
TUKNAO
TIDCAL
HIVKAY
KIKHOE
FASCUA
ZUDNET
TANYOW
HAZNIG
LEYSOY
MEYMEI
UVULAZ
IJORUW
HAGXUI
VILBEX
VAQPAE
EWASUR
XISJIS
QOMTIV
HUBZAG
VEGFIX
GEZNEE
YOBXIZ
IRUPOC
OHEDOU
JUHFID
KIKHUK
EBARIK
MEBTAO
DUFLEW
CEXTAB
HOLTEK
KIKJOG
WAQRUB
NIQLAD
HAKBIF
ULAMUQ
POFBAO
ELAZOH
YUYHOR
BUXVOG
IMBPNI
KUBFUJ
TAPZOC
VAJGIY
DAHCAQ
JAMMUI
MUSSIC
DUPWER
DUQMIN
IPITOQ
TEKPIL
KACVOB
XELMOR
FEKRAP
WABDEK
TEZRIC
ZEDGOE
FOWLIN
SESLIN
SILKON
RUTPOL
MUSQIA
RIHWAG
MIRNUW
TEBKIW
ZUTVER
QEVCEA
DUQFAY
LOPCAW
CAJKEG
ALEHOO
GECZIA

KAYPEH
CUSWUK
FEYPOR
KETMUT
KUBFIX
HACFEY
LUGYOB
KEDKEM
GAJKOR
QITBEA
HAZNOM
MERGUL
MIQBOE
FIXMEF
KOSQOA
ZAGXOX
QAPDUH
IVEKAX
MEBDAY
RORXEB
GIDMAJ
CEYWEK
CUQVIV
YEGLAZ
YOBXOF
PANNEX
NEFHAI
ZEJLUX
OHUBAV
LERPUW
ZEHQOU
FICJIN
RUSLOI
KOHHIY
BASDAC
XEVWUR
KIKHIY
AGACAP
QEMDOB
FIRJEW
ULOJOV
HAXQUT
TAMLOI
GIGMEO
NOPMAJ
UGEXIO
PIDHOZ
KOLXIV
DIGMAH
HADZOC
XUQWOX
UMOQOD
CEBKEA
XEHGUN
ABUKUG
EZIQAF
HOGQEZ
COFCIL
FOPHUP
KEWKED
NEXTAO
XACGAJ
VOYFOG
FUNRIP
BOJBAG
POKMEJ
RADTUL
COBTOE
KESLIE
WIDGOG
CIMGAG
CIFHIJ
FOPHOI
LUJCOK
YUMMOJ
CASNOC
LURFIN
KEMTED
KIBKOW
EYECAO
GELSAS
AGUZAD
PEHHOC
EKAPAJ
PEBXUP
LAHTOD
FUYVOM
DUNNIL
ZAGYEO
BAWKOA
JEJSAV
VESTIY
BAYKOD
SEWCUT
GAHNIP
SOWMIA
EKEGEI
IJOSOP
MINZEO
REWCUT
HOGFIV
XUCHEJ
CAXJAP
KESWIP
GAXXIO
WIWXAB
QAKLET
SAHYIM
MAHZUR
HEHWID
PICPID
QIZTUP
EYOMEK
JAYQAC
YICKUQ
POJHED
DOMXOU
AKOTUQ
HEWTAE
HUTMEO
JOKFAR
XEFMOL
ZUTQEK
ADAREF
IBOCEI
RECXUU
QOXXIL
QAKJER
SECXUU
UGAJET
XANDOI
VETPIT
BINFAH
MENLAS
VOCXEQ
ZEGGUN
ROLYIB
GEHYIB
ZAKGOH
GIQZIP
NALRUP
FIDVUM
WOMJUG

In [None]:
string = """select ccdc_id, denticity, coordinating_elements, coordinating_indices, xyz from ligand_data 
            where (coordinating_elements like '%[8]%' or coordinating_elements like '%[7]%' 
            or coordinating_elements like '%[8, 7]%' or coordinating_elements like '%[7, 8]%') 
            and (denticity=4)"""

query_res = list(cur.execute(string))
