In [13]:
import numpy as np
import qcportal as ptl
import pickle
import os
import cmiles
from openeye import oechem, oequacpac

In [2]:
client = ptl.FractalClient()  
ds = client.get_collection("TorsionDriveDataset", "OpenFF Gen 2 Torsion Set 1 Roche 2")
ds.status(["default"], collapse=False, status="COMPLETE")

Unnamed: 0,default
c1cn[c:1]([nH]1)[CH2:2][C@H:3]2CCCC[C@@H:4]2O,COMPLETE
c1cc[c:1](cc1)[CH2:2][CH2:3][F:4],COMPLETE
[CH3:4][CH2:3][C:2]1([CH2:1]OC1)C,COMPLETE
[H:1][CH2:2][C@:3]([H:4])(c1ccccc1)C(=O)N(C)C,COMPLETE
[H:1][CH2:2][C:3]([H:4])(c1ccccc1)c2ccccc2,COMPLETE
...,...
[CH3:1][N@@:2](c1ccccc1)[S:3](=O)(=O)[CH3:4],COMPLETE
[CH3:4][NH:3][S:2](=O)(=O)[c:1]1ccccc1,COMPLETE
c1cc[c:1](cc1)[S:2](=O)(=O)[N:3]2[CH2:4]CCC2,COMPLETE
C[N:2]([CH3:1])[S:3](=O)(=[O:4])c1ccccn1,COMPLETE


In [3]:
client.list_collections("TorsionDriveDataset")

Unnamed: 0_level_0,Unnamed: 1_level_0,tagline
collection,name,Unnamed: 2_level_1
TorsionDriveDataset,Fragment Stability Benchmark,
TorsionDriveDataset,Fragmenter paper,
TorsionDriveDataset,OpenFF Amide Torsion Set v1.0,"Amides, thioamides and amidines diversely func..."
TorsionDriveDataset,OpenFF DANCE 1 eMolecules t142 v1.0,
TorsionDriveDataset,OpenFF Fragmenter Validation 1.0,
TorsionDriveDataset,OpenFF Full TorsionDrive Benchmark 1,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 1 Roche,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 1 Roche 2,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 2 Coverage,
TorsionDriveDataset,OpenFF Gen 2 Torsion Set 2 Coverage 2,


In [4]:
client = ptl.FractalClient()
dataset_names = [ dataidx[1] for dataidx in client.list_collections("TorsionDriveDataset").index ]
len(dataset_names)

42

# Picking a torsiondrive record (first item)

In [65]:
def get_wbo_from_dataset(client, ds_name):

    all_data = []
    f = 0
    logged = False
    ds = client.get_collection("TorsionDriveDataset", ds_name)
    print(f"starting {ds_name}")
    print(f"molecules: {ds.status('default',status='COMPLETE').default[0]}")
    for i, index in enumerate(ds.df.index):

        # get the record of each entry
        tdr = ds.get_record(name=index, specification='default')
        
        if f % 10 == 0 and logged == False:
            logged = True
            print(f"    {f}")

        if tdr.status == "COMPLETE":
            f += 1
            logged = False
            try:
                if len(tdr.final_energy_dict) == 0:
                    print(f"Molecule had no final energy dict {index}")
                    continue
                min_idx = min(tdr.final_energy_dict, key=tdr.final_energy_dict.get)
                record = tdr.get_history(min_idx, minimum=True)
                # get optimized molecule of the record
                qc_mol = record.get_final_molecule()

                # convert the qcelemental molecule to an OpenEye molecule
                qcjson_mol = qc_mol.dict(encoding='json')
                oemol = cmiles.utils.load_molecule(qcjson_mol)


                dihedrals = tdr.keywords.dihedrals[0]
                natoms = len(record.get_final_molecule().symbols)
                result = record.get_trajectory()[-1]
                wiberg = np.array(result.extras["qcvars"]["WIBERG_LOWDIN_INDICES"]).reshape(-1,natoms)
                qmwbo = wiberg[dihedrals[1], dihedrals[2]]



                # make a copy for am1 computations
                calcmol1 = oechem.OEMol(oemol)

                calc = oequacpac.OEAM1()
                am1result = oequacpac.OEAM1Results()
                success = calc.CalcAM1(am1result, calcmol1)

                am1wbo = am1result.GetBondOrder(dihedrals[1], dihedrals[2])

                #                                      v  this makes different conformers have different dict entries.
                #                                           as far as I know, this just causes a warning in visuals
                smiles = oechem.OEMolToSmiles(oemol) + f"_{i}"


                this_molecule_data = ( smiles, ((qmwbo, am1wbo), dihedrals) )
                all_data.append(this_molecule_data)
            except:
                print(f"ERROR {ds_name} {i}")
                continue
    print(f"finished {ds_name}")
    return all_data

    

In [66]:
pkldir = "benchmark_results"

if not os.path.exists(pkldir):
    os.makedirs(pkldir)
    
for dname in dataset_names[19:]:
    wbo = get_wbo_from_dataset(client, dname)
    fname = f"{pkldir}/{dname.replace(' ','')}.pkl"
    to_dump = [dname, wbo]
    with open(fname, "wb") as pkf:
        pickle.dump( to_dump, pkf )

starting OpenFF Group1 Torsions
molecules: 816
    0
ERROR OpenFF Group1 Torsions 0
ERROR OpenFF Group1 Torsions 1
ERROR OpenFF Group1 Torsions 2
ERROR OpenFF Group1 Torsions 3
ERROR OpenFF Group1 Torsions 4
ERROR OpenFF Group1 Torsions 5
ERROR OpenFF Group1 Torsions 6
ERROR OpenFF Group1 Torsions 7
ERROR OpenFF Group1 Torsions 8
ERROR OpenFF Group1 Torsions 9
    10
ERROR OpenFF Group1 Torsions 10
ERROR OpenFF Group1 Torsions 11
ERROR OpenFF Group1 Torsions 12
ERROR OpenFF Group1 Torsions 13
ERROR OpenFF Group1 Torsions 14
ERROR OpenFF Group1 Torsions 15
ERROR OpenFF Group1 Torsions 16
ERROR OpenFF Group1 Torsions 17
ERROR OpenFF Group1 Torsions 18
ERROR OpenFF Group1 Torsions 19
    20
ERROR OpenFF Group1 Torsions 20
ERROR OpenFF Group1 Torsions 21
ERROR OpenFF Group1 Torsions 22
ERROR OpenFF Group1 Torsions 23
ERROR OpenFF Group1 Torsions 24
ERROR OpenFF Group1 Torsions 25
ERROR OpenFF Group1 Torsions 26
ERROR OpenFF Group1 Torsions 27
ERROR OpenFF Group1 Torsions 28
ERROR OpenFF Gr

ERROR OpenFF Group1 Torsions 245
ERROR OpenFF Group1 Torsions 246
ERROR OpenFF Group1 Torsions 247
ERROR OpenFF Group1 Torsions 248
ERROR OpenFF Group1 Torsions 249
    250
ERROR OpenFF Group1 Torsions 250
ERROR OpenFF Group1 Torsions 251
ERROR OpenFF Group1 Torsions 252
ERROR OpenFF Group1 Torsions 253
ERROR OpenFF Group1 Torsions 254
ERROR OpenFF Group1 Torsions 255
ERROR OpenFF Group1 Torsions 256
ERROR OpenFF Group1 Torsions 257
ERROR OpenFF Group1 Torsions 258
ERROR OpenFF Group1 Torsions 259
    260
ERROR OpenFF Group1 Torsions 260
ERROR OpenFF Group1 Torsions 261
ERROR OpenFF Group1 Torsions 263
ERROR OpenFF Group1 Torsions 264
ERROR OpenFF Group1 Torsions 265
ERROR OpenFF Group1 Torsions 266
ERROR OpenFF Group1 Torsions 267
ERROR OpenFF Group1 Torsions 268
ERROR OpenFF Group1 Torsions 269
ERROR OpenFF Group1 Torsions 270
    270
ERROR OpenFF Group1 Torsions 271
ERROR OpenFF Group1 Torsions 272
ERROR OpenFF Group1 Torsions 273
ERROR OpenFF Group1 Torsions 274
ERROR OpenFF Group1

ERROR OpenFF Group1 Torsions 490
ERROR OpenFF Group1 Torsions 491
    490
ERROR OpenFF Group1 Torsions 492
ERROR OpenFF Group1 Torsions 493
ERROR OpenFF Group1 Torsions 494
ERROR OpenFF Group1 Torsions 495
ERROR OpenFF Group1 Torsions 496
ERROR OpenFF Group1 Torsions 497
ERROR OpenFF Group1 Torsions 498
ERROR OpenFF Group1 Torsions 499
ERROR OpenFF Group1 Torsions 500
ERROR OpenFF Group1 Torsions 501
    500
ERROR OpenFF Group1 Torsions 502
ERROR OpenFF Group1 Torsions 503
ERROR OpenFF Group1 Torsions 504
ERROR OpenFF Group1 Torsions 505
ERROR OpenFF Group1 Torsions 506
ERROR OpenFF Group1 Torsions 507
ERROR OpenFF Group1 Torsions 508
ERROR OpenFF Group1 Torsions 509
ERROR OpenFF Group1 Torsions 510
ERROR OpenFF Group1 Torsions 511
    510
ERROR OpenFF Group1 Torsions 512
ERROR OpenFF Group1 Torsions 513
ERROR OpenFF Group1 Torsions 514
ERROR OpenFF Group1 Torsions 515
ERROR OpenFF Group1 Torsions 516
ERROR OpenFF Group1 Torsions 517
ERROR OpenFF Group1 Torsions 518
ERROR OpenFF Group1

ERROR OpenFF Group1 Torsions 735
ERROR OpenFF Group1 Torsions 736
ERROR OpenFF Group1 Torsions 737
ERROR OpenFF Group1 Torsions 738
ERROR OpenFF Group1 Torsions 739
ERROR OpenFF Group1 Torsions 740
ERROR OpenFF Group1 Torsions 741
ERROR OpenFF Group1 Torsions 742
ERROR OpenFF Group1 Torsions 743
    740
ERROR OpenFF Group1 Torsions 744
ERROR OpenFF Group1 Torsions 745
ERROR OpenFF Group1 Torsions 746
ERROR OpenFF Group1 Torsions 747
ERROR OpenFF Group1 Torsions 748
ERROR OpenFF Group1 Torsions 749
ERROR OpenFF Group1 Torsions 750
ERROR OpenFF Group1 Torsions 751
ERROR OpenFF Group1 Torsions 752
ERROR OpenFF Group1 Torsions 753
    750
ERROR OpenFF Group1 Torsions 754
ERROR OpenFF Group1 Torsions 755
ERROR OpenFF Group1 Torsions 756
ERROR OpenFF Group1 Torsions 757
ERROR OpenFF Group1 Torsions 758
ERROR OpenFF Group1 Torsions 759
ERROR OpenFF Group1 Torsions 760
ERROR OpenFF Group1 Torsions 761
ERROR OpenFF Group1 Torsions 762
ERROR OpenFF Group1 Torsions 763
    760
ERROR OpenFF Group1

    20
    30
    40
    50
    60
    70
    80
    90
    100
    110
    120
    130
    140
    150
    160
    170
    180
    190
    200
    210
    220
    230
    240
    250
    260
    270
    280
    290
    300
    310
    320
    330
    340
    350
    360
    370
    380
    390
    400
    410
    420
    430
    440
    450
    460
    470
    480
    490
    500
    510
    520
    530
    540
    550
    560
    570
    580
    590
    600
    610
    620
    630
    640
    650
    660
    670
    680
    690
    700
    710
finished OpenFF WBO Conjugated Series v1.0
starting OpenFF-benchmark-ligand-fragments-v1.0
molecules: 481
    0
    10
    20
    30
    40
    50
    60
    70
    80
    90
    100
    110
    120
    130
    140
    150
    160
    170
    180
    190
    200
    210
    220
    230
    240
    250
    260
    270
    280
    290
    300
    310
    320
    330
    340
    350
    360
    370
    380
    390
    400
    410
    420
    430
 

ERROR SiliconTX Torsion Benchmark Set 1 176
ERROR SiliconTX Torsion Benchmark Set 1 177
ERROR SiliconTX Torsion Benchmark Set 1 178
ERROR SiliconTX Torsion Benchmark Set 1 179
ERROR SiliconTX Torsion Benchmark Set 1 180
ERROR SiliconTX Torsion Benchmark Set 1 181
ERROR SiliconTX Torsion Benchmark Set 1 182
    150
ERROR SiliconTX Torsion Benchmark Set 1 183
ERROR SiliconTX Torsion Benchmark Set 1 184
ERROR SiliconTX Torsion Benchmark Set 1 185
ERROR SiliconTX Torsion Benchmark Set 1 186
ERROR SiliconTX Torsion Benchmark Set 1 187
ERROR SiliconTX Torsion Benchmark Set 1 188
ERROR SiliconTX Torsion Benchmark Set 1 189
ERROR SiliconTX Torsion Benchmark Set 1 190
ERROR SiliconTX Torsion Benchmark Set 1 191
ERROR SiliconTX Torsion Benchmark Set 1 192
    160
ERROR SiliconTX Torsion Benchmark Set 1 193
ERROR SiliconTX Torsion Benchmark Set 1 194
ERROR SiliconTX Torsion Benchmark Set 1 195
ERROR SiliconTX Torsion Benchmark Set 1 196
ERROR SiliconTX Torsion Benchmark Set 1 197
ERROR SiliconTX 

ERROR SiliconTX Torsion Benchmark Set 1 378
ERROR SiliconTX Torsion Benchmark Set 1 379
ERROR SiliconTX Torsion Benchmark Set 1 381
ERROR SiliconTX Torsion Benchmark Set 1 382
    330
ERROR SiliconTX Torsion Benchmark Set 1 383
ERROR SiliconTX Torsion Benchmark Set 1 384
ERROR SiliconTX Torsion Benchmark Set 1 385
ERROR SiliconTX Torsion Benchmark Set 1 391
ERROR SiliconTX Torsion Benchmark Set 1 392
ERROR SiliconTX Torsion Benchmark Set 1 393
ERROR SiliconTX Torsion Benchmark Set 1 394
ERROR SiliconTX Torsion Benchmark Set 1 395
ERROR SiliconTX Torsion Benchmark Set 1 396
ERROR SiliconTX Torsion Benchmark Set 1 397
    340
ERROR SiliconTX Torsion Benchmark Set 1 398
ERROR SiliconTX Torsion Benchmark Set 1 399
ERROR SiliconTX Torsion Benchmark Set 1 400
ERROR SiliconTX Torsion Benchmark Set 1 401
ERROR SiliconTX Torsion Benchmark Set 1 402
ERROR SiliconTX Torsion Benchmark Set 1 403
ERROR SiliconTX Torsion Benchmark Set 1 404
ERROR SiliconTX Torsion Benchmark Set 1 406
ERROR SiliconTX 

ERROR SiliconTX Torsion Benchmark Set 1 599
    510
ERROR SiliconTX Torsion Benchmark Set 1 600
ERROR SiliconTX Torsion Benchmark Set 1 602
ERROR SiliconTX Torsion Benchmark Set 1 603
ERROR SiliconTX Torsion Benchmark Set 1 604
ERROR SiliconTX Torsion Benchmark Set 1 606
ERROR SiliconTX Torsion Benchmark Set 1 607
ERROR SiliconTX Torsion Benchmark Set 1 608
ERROR SiliconTX Torsion Benchmark Set 1 609
ERROR SiliconTX Torsion Benchmark Set 1 610
ERROR SiliconTX Torsion Benchmark Set 1 613
    520
ERROR SiliconTX Torsion Benchmark Set 1 616
ERROR SiliconTX Torsion Benchmark Set 1 617
ERROR SiliconTX Torsion Benchmark Set 1 619
ERROR SiliconTX Torsion Benchmark Set 1 623
ERROR SiliconTX Torsion Benchmark Set 1 624
ERROR SiliconTX Torsion Benchmark Set 1 629
ERROR SiliconTX Torsion Benchmark Set 1 631
ERROR SiliconTX Torsion Benchmark Set 1 632
ERROR SiliconTX Torsion Benchmark Set 1 633
ERROR SiliconTX Torsion Benchmark Set 1 634
    530
ERROR SiliconTX Torsion Benchmark Set 1 635
ERROR Si

ERROR SiliconTX Torsion Benchmark Set 1 834
ERROR SiliconTX Torsion Benchmark Set 1 835
ERROR SiliconTX Torsion Benchmark Set 1 836
ERROR SiliconTX Torsion Benchmark Set 1 837
ERROR SiliconTX Torsion Benchmark Set 1 838
ERROR SiliconTX Torsion Benchmark Set 1 842
ERROR SiliconTX Torsion Benchmark Set 1 843
ERROR SiliconTX Torsion Benchmark Set 1 844
    700
ERROR SiliconTX Torsion Benchmark Set 1 845
ERROR SiliconTX Torsion Benchmark Set 1 846
ERROR SiliconTX Torsion Benchmark Set 1 847
ERROR SiliconTX Torsion Benchmark Set 1 848
ERROR SiliconTX Torsion Benchmark Set 1 850
ERROR SiliconTX Torsion Benchmark Set 1 851
ERROR SiliconTX Torsion Benchmark Set 1 852
ERROR SiliconTX Torsion Benchmark Set 1 853
ERROR SiliconTX Torsion Benchmark Set 1 854
ERROR SiliconTX Torsion Benchmark Set 1 855
    710
ERROR SiliconTX Torsion Benchmark Set 1 856
ERROR SiliconTX Torsion Benchmark Set 1 857
ERROR SiliconTX Torsion Benchmark Set 1 858
ERROR SiliconTX Torsion Benchmark Set 1 859
ERROR SiliconTX 

ERROR SiliconTX Torsion Benchmark Set 1 1073
ERROR SiliconTX Torsion Benchmark Set 1 1074
ERROR SiliconTX Torsion Benchmark Set 1 1075
ERROR SiliconTX Torsion Benchmark Set 1 1076
ERROR SiliconTX Torsion Benchmark Set 1 1077
ERROR SiliconTX Torsion Benchmark Set 1 1078
    880
ERROR SiliconTX Torsion Benchmark Set 1 1082
ERROR SiliconTX Torsion Benchmark Set 1 1083
ERROR SiliconTX Torsion Benchmark Set 1 1084
ERROR SiliconTX Torsion Benchmark Set 1 1085
ERROR SiliconTX Torsion Benchmark Set 1 1086
ERROR SiliconTX Torsion Benchmark Set 1 1088
ERROR SiliconTX Torsion Benchmark Set 1 1089
ERROR SiliconTX Torsion Benchmark Set 1 1093
ERROR SiliconTX Torsion Benchmark Set 1 1094
ERROR SiliconTX Torsion Benchmark Set 1 1095
    890
ERROR SiliconTX Torsion Benchmark Set 1 1096
ERROR SiliconTX Torsion Benchmark Set 1 1097
ERROR SiliconTX Torsion Benchmark Set 1 1098
ERROR SiliconTX Torsion Benchmark Set 1 1099
ERROR SiliconTX Torsion Benchmark Set 1 1100
ERROR SiliconTX Torsion Benchmark Set 1

ERROR SiliconTX Torsion Benchmark Set 1 1281
ERROR SiliconTX Torsion Benchmark Set 1 1282
ERROR SiliconTX Torsion Benchmark Set 1 1283
ERROR SiliconTX Torsion Benchmark Set 1 1284
ERROR SiliconTX Torsion Benchmark Set 1 1285
ERROR SiliconTX Torsion Benchmark Set 1 1286
ERROR SiliconTX Torsion Benchmark Set 1 1288
    1060
ERROR SiliconTX Torsion Benchmark Set 1 1289
ERROR SiliconTX Torsion Benchmark Set 1 1290
ERROR SiliconTX Torsion Benchmark Set 1 1291
ERROR SiliconTX Torsion Benchmark Set 1 1292
ERROR SiliconTX Torsion Benchmark Set 1 1293
ERROR SiliconTX Torsion Benchmark Set 1 1294
ERROR SiliconTX Torsion Benchmark Set 1 1295
ERROR SiliconTX Torsion Benchmark Set 1 1297
ERROR SiliconTX Torsion Benchmark Set 1 1298
ERROR SiliconTX Torsion Benchmark Set 1 1299
    1070
ERROR SiliconTX Torsion Benchmark Set 1 1300
ERROR SiliconTX Torsion Benchmark Set 1 1301
ERROR SiliconTX Torsion Benchmark Set 1 1302
ERROR SiliconTX Torsion Benchmark Set 1 1303
ERROR SiliconTX Torsion Benchmark Set

In [60]:
ds = client.get_collection("TorsionDriveDataset", "SMIRNOFF Coverage Torsion Set 1")

index = ds.df.index[292]
print(index)
# get the record of each entry
tdr = ds.get_record(name=index, specification='default')
print(len(tdr.final_energy_dict))
min_idx = min(tdr.final_energy_dict, key=tdr.final_energy_dict.get)
record = tdr.get_history(min_idx, minimum=True)

[H:4][O:3][S@@:2](=[O:1])c1cc(ccn1)Cl
24


IndexError: list index out of range

In [38]:
dataset_names.index("SMIRNOFF Coverage Torsion Set 1")

39