In [1]:
from photocatalysis.evaluate import evaluate_substrate_in_batches, evaluate_substrate

In [2]:
import numpy as np
from photocatalysis.adsorption.relaxing import build_and_relax_configurations, filter_configurations
from photocatalysis.conformers import get_conformers_rdkit as get_conformers


In [3]:
from rdkit import Chem

In [4]:
from copy import deepcopy

In [5]:
### For automatically reloading import modules... allows you to run changes to code in jupyter without having to reload
%load_ext autoreload
%autoreload 2

In [6]:
smile_string_list = [#'C1=CC(c2cc(C=Cc3ncns3)cc(C3=CCC=C3)n2)=CC1',
 #'C1=CCC(c2cc(C3=CC=CC3)cc(-c3cc[nH]c3)c2)=C1',
 'C1=CCC(c2ccnnc2-c2nnccc2C2=CC=CC2)=C1']

In [23]:
confs, cgg = get_conformers(smile_string_list[0], n_cpu=1)
confs_multi, cggm = get_conformers(smile_string_list[0], n_cpu=4)

print(cgg.n_cpu, cggm.n_cpu)

Number of rotatable bonds: 3, C1=CCC(c2ccnnc2-c2nnccc2C2=CC=CC2)=C1
Number of rotatable bonds: 3, C1=CCC(c2ccnnc2-c2nnccc2C2=CC=CC2)=C1
1 4


In [24]:
# EMBEDING
test = cgg.embed_molecule(confs, userandom=False)
testm = cggm.embed_molecule(confs_multi, userandom=False)

In [25]:
### embed_molecule produces the same conformers!... errors must be arrising elsewhere
for c, cm in zip(test.GetConformers(), testm.GetConformers()):
    print(np.sum(c.GetPositions() - cm.GetPositions()))

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [26]:
# MINIMIZING
cgg.minimize_conformers(test)
cggm.minimize_conformers(testm)

In [27]:
### Minimized the same
for c, cm in zip(test.GetConformers(), testm.GetConformers()):
    print(np.sum(c.GetPositions() - cm.GetPositions()))

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [28]:
### Getting into pruning
energies = cgg.get_conformer_energies(test)
energiesm = cggm.get_conformer_energies(testm)

In [29]:
test_copy = deepcopy(test)
testm_copy = deepcopy(testm)
rmsd_copy = cgg.get_conformer_rmsd(test_copy)
rmsdm_copy = cggm.get_conformer_rmsd_multiproc(testm_copy)

In [30]:
# Serial Case changes the original molecule !!!!!!
# Could be because in the serial case, original molecule is what is being alligned against something else
for j in range(test.GetNumConformers()):
    print(np.sum(test.GetConformer(j).GetPositions() - test_copy.GetConformer(j).GetPositions()))

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [31]:
# Parallel Case doesnt change the original molecule
for j in range(testm.GetNumConformers()):
    print(np.sum(testm.GetConformer(j).GetPositions() - testm_copy.GetConformer(j).GetPositions()))

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [32]:
# PRUNING (ERRONEOUS STEP)
keep, discard, new = cgg.prune_conformers(deepcopy(test))
keepm, discardm, newm = cggm.prune_conformers(deepcopy(testm))

In [33]:
list(zip(keep, keepm))

[(14, 14),
 (22, 22),
 (7, 7),
 (21, 21),
 (34, 34),
 (38, 38),
 (16, 16),
 (32, 32),
 (47, 47),
 (3, 3),
 (24, 24)]

In [35]:
### Final Test
for c, cm in zip(new.GetConformers(), newm.GetConformers()):
    print(np.sum(c.GetPositions() - cm.GetPositions()))

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


------ DEBUG FINISHED?

In [37]:
# scratch_dir = '/home/btpq/bt308495/Thesis/scratch'
scratch_dir = '/home/scakolli/Thesis/scratch'
calc_kwargs = {'gfn':2, 'acc':0.2, 'etemp':298.15, 'strict':'', 'gbsa':'water'}

In [39]:
out = evaluate_substrate(smile_string_list[0], calc_kwargs, scratch_dir=scratch_dir)

2023-07-10 13:11:12,493 | INFO: Preparing substrate


In [40]:
outp = evaluate_substrate_in_batches(smile_string_list, calc_kwargs, scratch_dir=scratch_dir)

############################
2023-07-10 13:11:33,187 | INFO: PREPARING SUBSTRATES, BATCH None
2023-07-10 13:11:33,189 | INFO: substrate preparation jobs to do: 1
2023-07-10 13:11:54,254 | INFO: finished jobs. Took 21.063460500001383s
2023-07-10 13:11:54,261 | INFO: FIZZLED: 0


In [42]:
# direct = '/home/btpq/bt308495/Thesis/scratch/config1'
direct = '/home/scakolli/Thesis/scratch/config'
configs = build_and_relax_configurations(out, out.info['equivalent_atoms'], directory=direct)

2023-07-10 13:12:15,405 | INFO: opt loose jobs to do: 36
2023-07-10 13:12:44,270 | INFO: finished jobs. Took 28.86383290000049s


In [43]:
oh, o, ooh = configs

In [44]:
oh_filt = filter_configurations(oh, out)
print('########')
o_filt = filter_configurations(o, out)
print('########')
ooh_filt = filter_configurations(ooh, out)



[0] True /home/scakolli/Thesis/scratch/config/run_0
[1] True /home/scakolli/Thesis/scratch/config/run_1
[3] True /home/scakolli/Thesis/scratch/config/run_2
[4] True /home/scakolli/Thesis/scratch/config/run_3
[5] True /home/scakolli/Thesis/scratch/config/run_4
[6] True /home/scakolli/Thesis/scratch/config/run_5
[7] True /home/scakolli/Thesis/scratch/config/run_6
[7] True /home/scakolli/Thesis/scratch/config/run_7
[8] True /home/scakolli/Thesis/scratch/config/run_8
[8] True /home/scakolli/Thesis/scratch/config/run_9
[9] True /home/scakolli/Thesis/scratch/config/run_10
[17] True /home/scakolli/Thesis/scratch/config/run_11
########
[0, 1] True /home/scakolli/Thesis/scratch/config/run_12
[0, 1] True /home/scakolli/Thesis/scratch/config/run_13
[3, 21] True /home/scakolli/Thesis/scratch/config/run_14
[4, 5] True /home/scakolli/Thesis/scratch/config/run_15
[5, 6] True /home/scakolli/Thesis/scratch/config/run_16
[5, 6] True /home/scakolli/Thesis/scratch/config/run_17
[6, 7] True /home/scakolli/

In [45]:
# directp = '/home/btpq/bt308495/Thesis/scratch/configp'
directp = '/home/scakolli/Thesis/scratch/configp'
configsp = build_and_relax_configurations(outp, outp.info['equivalent_atoms'], directory=directp)

2023-07-10 13:12:52,774 | INFO: opt loose jobs to do: 36
2023-07-10 13:13:24,408 | INFO: finished jobs. Took 31.63320139999996s


In [46]:
ohp, op, oohp = configsp

In [47]:
ohp_filt = filter_configurations(ohp, outp)
print('########')
op_filt = filter_configurations(op, outp)
print('########')
oohp_filt = filter_configurations(oohp, outp)



[0] True /home/scakolli/Thesis/scratch/configp/run_0
[1] True /home/scakolli/Thesis/scratch/configp/run_1
[3] True /home/scakolli/Thesis/scratch/configp/run_2
[4] True /home/scakolli/Thesis/scratch/configp/run_3
[5] True /home/scakolli/Thesis/scratch/configp/run_4
[6] True /home/scakolli/Thesis/scratch/configp/run_5
[7] True /home/scakolli/Thesis/scratch/configp/run_6
[7] True /home/scakolli/Thesis/scratch/configp/run_7
[8] True /home/scakolli/Thesis/scratch/configp/run_8
[8] True /home/scakolli/Thesis/scratch/configp/run_9
[9] True /home/scakolli/Thesis/scratch/configp/run_10
[17] True /home/scakolli/Thesis/scratch/configp/run_11
########
[0, 1] True /home/scakolli/Thesis/scratch/configp/run_12
[0, 1] True /home/scakolli/Thesis/scratch/configp/run_13
[3, 21] True /home/scakolli/Thesis/scratch/configp/run_14
[4, 5] True /home/scakolli/Thesis/scratch/configp/run_15
[5, 6] True /home/scakolli/Thesis/scratch/configp/run_16
[5, 6] True /home/scakolli/Thesis/scratch/configp/run_17
[6, 7] Tr

In [48]:
[(c.info['active_site'], cp.info['active_site']) for c, cp in zip(oh_filt, ohp_filt)]

[(0, 0),
 (1, 1),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 7),
 (8, 8),
 (9, 9),
 (17, 17)]

In [49]:
[(c.info['active_site'], cp.info['active_site']) for c, cp in zip(o_filt, op_filt)]

[('[0, 1]', '[0, 1]'),
 ('[3, 21]', '[3, 21]'),
 ('[4, 5]', '[4, 5]'),
 ('[5, 6]', '[5, 6]'),
 ('[6, 7]', '[6, 7]'),
 (7, 7),
 (8, 8),
 ('[8, 9]', '[8, 9]'),
 ('[16, 17]', '[16, 17]')]

In [50]:
[c.info['active_site'] for c in ooh_filt], [c.info['active_site'] for c in oohp_filt]

([1, 3, 5, 6, 8, 9, 17], [1, 3, 5, 6, 8, 9, 17])

In [51]:
print('########')
print('Config - Serial')
filter_configurations(ooh, out)
print('########')
print('Configp - Parrallel')
filter_configurations(oohp, outp)

########
Config - Serial
[1] True /home/scakolli/Thesis/scratch/config/run_24
[0, 1] False /home/scakolli/Thesis/scratch/config/run_25
[3] True /home/scakolli/Thesis/scratch/config/run_26
[] False /home/scakolli/Thesis/scratch/config/run_27
[5] True /home/scakolli/Thesis/scratch/config/run_28
[6] True /home/scakolli/Thesis/scratch/config/run_29
[] False /home/scakolli/Thesis/scratch/config/run_30
[7] False /home/scakolli/Thesis/scratch/config/run_31
[] False /home/scakolli/Thesis/scratch/config/run_32
[8] True /home/scakolli/Thesis/scratch/config/run_33
[9] True /home/scakolli/Thesis/scratch/config/run_34
[17] True /home/scakolli/Thesis/scratch/config/run_35
########
Configp - Parrallel
[1] True /home/scakolli/Thesis/scratch/configp/run_24
[0, 1] False /home/scakolli/Thesis/scratch/configp/run_25
[3] True /home/scakolli/Thesis/scratch/configp/run_26
[] False /home/scakolli/Thesis/scratch/configp/run_27
[5] True /home/scakolli/Thesis/scratch/configp/run_28
[6] True /home/scakolli/Thesis

[Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False),
 Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False),
 Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False),
 Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False),
 Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False),
 Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False),
 Atoms(symbols='C7N2C2N2C9H14O2H', pbc=False)]

In [52]:
from photocatalysis.thermodynamics.thermodynamics import global_min_configurations

In [53]:
min_energy_configs = global_min_configurations(oh_filt, o_filt, ooh_filt)
min_energy_configsp = global_min_configurations(ohp_filt, op_filt, oohp_filt)

In [54]:
[(m.info, mp.info, ['################']) for m, mp in zip(min_energy_configs, min_energy_configsp)]

[({'energy': -1682.3013156511793,
   'walltime': 4.956,
   'cputime': 4.891,
   'fname': '/home/scakolli/Thesis/scratch/config/run_1',
   'active_site': 1},
  {'energy': -1682.3013156511793,
   'walltime': 5.661,
   'cputime': 5.297,
   'fname': '/home/scakolli/Thesis/scratch/configp/run_1',
   'active_site': 1},
  ['################']),
 ({'energy': -1668.8647128400255,
   'walltime': 1.836,
   'cputime': 1.734,
   'fname': '/home/scakolli/Thesis/scratch/config/run_13',
   'active_site': '[0, 1]'},
  {'energy': -1668.8647128400255,
   'walltime': 1.981,
   'cputime': 1.906,
   'fname': '/home/scakolli/Thesis/scratch/configp/run_13',
   'active_site': '[0, 1]'},
  ['################']),
 ({'energy': -1790.9420144516737,
   'walltime': 6.854,
   'cputime': 6.484,
   'fname': '/home/scakolli/Thesis/scratch/config/run_24',
   'active_site': 1},
  {'energy': -1790.9420144516737,
   'walltime': 8.288,
   'cputime': 7.531,
   'fname': '/home/scakolli/Thesis/scratch/configp/run_24',
   'activ

In [56]:
e = np.array([o.info['energy'] for o in configs[0]])
ep = np.array([o.info['energy'] for o in configsp[0]])
e-ep

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
# for k in out.info:
#     print(k, out.info[k] == outp.info[k])