In [2]:
import os
import sys
import time

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
sys.path.append('../source')
from scipy.spatial.distance import cdist
from rdkit.Chem.rdmolfiles import MolFromSmiles
from rdkit.Chem.rdmolops import AddHs

import graph_nets as gn
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

import GraphBuilder

In [5]:
builder = GraphBuilder.GraphBuilder()
ALLOWED_ELEMENTS = set(['H', 'C', 'N', 'O', 'F', 'S', 'Cl'])
MIN_SIZE = 2

In [6]:
dimer_coordinates, dimer_smiles, dimer_elements, dimer_energies_sns, dimer_energies_sapt = [], [], [], [], []
geometry_indices, k_indices, group_ids, system_ids = [], [], [], []

with open('24853016', 'r') as file:
    next(file)
    for idl, line in enumerate(file):
        smile_0, smile_1, charge_0, charge_1, n_atoms_0, n_atoms_1, system_id, group_orig, group_id, \
        k_index, geom_id, qz_HF, qz_MP2_os, qz_MP2_ss, qz_MP2_all, tz_HF, tz_MP2_os, tz_MP2_ss, \
        tz_MP2_all, cbs_MP2_os, cbs_MP2_ss, cbs_MP2_all, espx_HF_es, espx_HF_hl, espx_HF_ovl, espx_MP2_es,\
        espx_MP2_ovl, sapt_es, sapt_ex, sapt_exs2, sapt_ind, sapt_exind, sapt_disp, sapt_exdisp_os, sapt_exdisp_ss, \
        sapt_delta_HF, sapt_all, nn_CCSDT_all, nn_CCSDT_all_05, nn_CCSDT_all_95, xyz, elements = line.split(',')
        charge_0, charge_1 = int(charge_0), int(charge_1)
        n_atoms_0, n_atoms_1 = int(n_atoms_0), int(n_atoms_1)        
        if charge_0 == 0 and charge_1 == 0 and n_atoms_0 >= MIN_SIZE and n_atoms_1 >= MIN_SIZE:
            sns_cc_potential = float(nn_CCSDT_all)
            sapt_potentials = (float(sapt_all), float(sapt_es), float(sapt_ind), float(sapt_exind), float(sapt_delta_HF), float(sapt_ex), float(sapt_exs2), float(sapt_disp), float(sapt_exdisp_os), float(sapt_exdisp_ss))
            coordinates = np.array([float(c) for c in xyz.split()]).reshape((-1, 3))
            elements = elements.split()
            if not (set(elements) - ALLOWED_ELEMENTS):
                coordinates_0, coordinates_1 = tf.split(coordinates.astype(np.float32), [n_atoms_0, n_atoms_1], axis=0)
                coordinates_0, coordinates_1 = coordinates_0.numpy(), coordinates_1.numpy()
                elements_0, elements_1 = tf.split(elements, [n_atoms_0, n_atoms_1], axis=0)
                elements_0, elements_1 = elements_0.numpy().astype(str), elements_1.numpy().astype(str)
                dimer_coordinates.append((coordinates_0, coordinates_1))
                dimer_smiles.append((smile_0, smile_1))
                dimer_elements.append((elements_0, elements_1))
                dimer_energies_sns.append(sns_cc_potential)
                dimer_energies_sapt.append(sapt_potentials)
                geometry_indices.append(int(geom_id))
                k_indices.append(int(k_index))
                group_ids.append(group_id)
                system_ids.append(system_id)
dimer_energies_sapt = np.array(dimer_energies_sapt) * 4.184
dimer_energies_sns = np.array(dimer_energies_sns) * 4.184
group_ids = np.array(group_ids, dtype=np.int32)
system_ids = np.array(system_ids, dtype=np.int32)
k_indices = np.array(k_indices, dtype=np.int32)

2023-01-16 09:49:10.785956: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-01-16 09:49:10.785999: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: usa.ethz.ch
2023-01-16 09:49:10.786010: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: usa.ethz.ch
2023-01-16 09:49:10.786079: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.39.1
2023-01-16 09:49:10.786118: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 510.39.1
2023-01-16 09:49:10.786126: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 510.39.1
2023-01-16 09:49:10.786394: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructio

In [7]:
dissociation_curves_temp = {}
for index, (coordinates, smile, elements, energy_sns, energy_sapt, k_index, group_id) in \
    enumerate(zip(dimer_coordinates, dimer_smiles, dimer_elements, dimer_energies_sns, dimer_energies_sapt, k_indices, group_ids)):
    smile_1, smile_2 = smile
    if not set(np.hstack(elements)) - ALLOWED_ELEMENTS:
        if group_id not in dissociation_curves_temp:
            bonds_1, bonds_2 = AddHs(MolFromSmiles(smile_1)).GetNumBonds(), AddHs(MolFromSmiles(smile_2)).GetNumBonds()
            graph_1, graph_2 = builder.from_coords(coordinates[0], elements[0]), builder.from_coords(coordinates[1], elements[1])
            edges_1, edges_2 = graph_1.n_edge // 2, graph_2.n_edge // 2
            if bonds_1 != edges_1 or bonds_2 != edges_2:
                print(smile, elements)
                continue
            dissociation_curves_temp[group_id] = {}
            dissociation_curves_temp[group_id]['energies'] = {}
            dissociation_curves_temp[group_id]['sapt'] = {}
            dissociation_curves_temp[group_id]['coordinates'] = {}
            dissociation_curves_temp[group_id]['smiles'] = smile
            dissociation_curves_temp[group_id]['elements'] = elements
            dissociation_curves_temp[group_id]['graphs'] = (graph_1, graph_2)
            #dissociation_curves_temp[group_id]['charges'] = (charges_1, charges_2)
        dissociation_curves_temp[group_id]['energies'][k_index] = energy_sns
        dissociation_curves_temp[group_id]['sapt'][k_index] = energy_sapt
        dissociation_curves_temp[group_id]['coordinates'][k_index] = coordinates

In [8]:
dissociation_curves = {}
for idk, key in enumerate(dissociation_curves_temp):
    k_indices = np.sort(list(dissociation_curves_temp[key]['energies'].keys()))
    curve, dmats, coords, sapt = [], [], [], []
    for k_index in k_indices:
        curve.append(dissociation_curves_temp[key]['energies'][k_index])
        dmats.append(cdist(*dissociation_curves_temp[key]['coordinates'][k_index]).astype(np.float32))
        coords.append(dissociation_curves_temp[key]['coordinates'][k_index])
        sapt.append(dissociation_curves_temp[key]['sapt'][k_index])
    coords_1, coords_2 = [x[0] for x in coords], [x[1] for x in coords]
    coords_1, coords_2 = np.array(coords_1, dtype=np.float32), np.array(coords_2, dtype=np.float32)
    dissociation_curves[key] = {}
    dissociation_curves[key]['energies'] = np.array(curve, dtype=np.float32)
    dissociation_curves[key]['sapt'] = np.array(sapt, dtype=np.float32)
    dissociation_curves[key]['distance_matrices'] = np.array(dmats, dtype=np.float32)
    dissociation_curves[key]['coordinates'] = (coords_1, coords_2)
    dissociation_curves[key]['smiles'] = dissociation_curves_temp[key]['smiles']
    dissociation_curves[key]['elements'] = dissociation_curves_temp[key]['elements']    
    dissociation_curves[key]['graphs'] = dissociation_curves_temp[key]['graphs']
    dissociation_curves[key]['k_indices'] = k_indices

In [9]:
np.save('DES5M', dissociation_curves)