In [20]:
import pandas as pd
from rdkit.Chem import PandasTools
from psikit import Psikit

In [21]:
esol_data = pd.read_csv('smiles.csv')
esol_data.head(10)

Unnamed: 0,smiles
0,C=C
1,C=O
2,C#N
3,CCO
4,CCC(=O)O
5,CN=C=O
6,C1=CCC=CC1
7,C1=CC=CC=C1
8,C1=C(C)C=CC=C1
9,C1CCC2CCCCC2C1


In [22]:
PandasTools.AddMoleculeColumnToFrame(esol_data, smilesCol='smiles')
esol_data.head(10)

Unnamed: 0,smiles,ROMol
0,C=C,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7900>
1,C=O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7890>
2,C#N,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7660>
3,CCO,<rdkit.Chem.rdchem.Mol object at 0x150bcedf70b0>
4,CCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf79e0>
5,CN=C=O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7c80>
6,C1=CCC=CC1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7970>
7,C1=CC=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7cf0>
8,C1=C(C)C=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7740>
9,C1CCC2CCCCC2C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf2040>


In [23]:
esol_data["n_Atoms"] = esol_data['ROMol'].map(lambda x: x.GetNumAtoms())
esol_data.head(10)

Unnamed: 0,smiles,ROMol,n_Atoms
0,C=C,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7900>,2
1,C=O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7890>,2
2,C#N,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7660>,2
3,CCO,<rdkit.Chem.rdchem.Mol object at 0x150bcedf70b0>,3
4,CCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf79e0>,5
5,CN=C=O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7c80>,4
6,C1=CCC=CC1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7970>,6
7,C1=CC=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7cf0>,6
8,C1=C(C)C=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7740>,7
9,C1CCC2CCCCC2C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf2040>,10


In [24]:
print(esol_data['smiles'][0])

C=C


In [26]:
energy=[]
pk=Psikit(debug=True,threads=1,memory=12)
for i in range(10):
    %time
    pk.read_from_smiles(esol_data['smiles'][i])
    energy.append(pk.optimize())
    


  Memory set to  11.176 GiB by Python driver.
  Threads set to 1 by Python driver.
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.05 µs
Optimizer: Optimization complete!
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.48 µs
Optimizer: Optimization complete!
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
Optimizer: Optimization complete!
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
Optimizer: Optimization complete!
CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.58 µs
Optimizer: Optimization complete!
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 7.39 µs
Optimizer: Optimization complete!
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.72 µs
Optimizer: Optimization complete!
CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 8.34 µs
Optimizer: Optimization complete!
CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 5.72 µs
Optimizer: Optimization complete!
CPU times: user 2 µs, sys:

In [27]:
print(energy)

[-78.03876941254534, -113.86953646696564, -92.87694247778018, -154.0899187072665, -266.85898515762165, -206.79559264403252, -231.84676156654098, -230.71352354231627, -269.75355216648376, -389.2048422472965]


In [28]:
esol_data['energy']=energy

In [29]:
esol_data.head(10)

Unnamed: 0,smiles,ROMol,n_Atoms,energy
0,C=C,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7900>,2,-78.038769
1,C=O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7890>,2,-113.869536
2,C#N,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7660>,2,-92.876942
3,CCO,<rdkit.Chem.rdchem.Mol object at 0x150bcedf70b0>,3,-154.089919
4,CCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf79e0>,5,-266.858985
5,CN=C=O,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7c80>,4,-206.795593
6,C1=CCC=CC1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7970>,6,-231.846762
7,C1=CC=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7cf0>,6,-230.713524
8,C1=C(C)C=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf7740>,7,-269.753552
9,C1CCC2CCCCC2C1,<rdkit.Chem.rdchem.Mol object at 0x150bcedf2040>,10,-389.204842
