In [1]:
import numpy as np
import qcportal as ptl
import pickle
import os
import cmiles
from openeye import oechem, oequacpac

In [2]:
client = ptl.FractalClient()  
ds = client.get_collection("TorsionDriveDataset", "OpenFF Gen 2 Torsion Set 1 Roche 2")
ds.status(["default"], collapse=False, status="COMPLETE")

Unnamed: 0,default
c1cn[c:1]([nH]1)[CH2:2][C@H:3]2CCCC[C@@H:4]2O,COMPLETE
c1cc[c:1](cc1)[CH2:2][CH2:3][F:4],COMPLETE
[CH3:4][CH2:3][C:2]1([CH2:1]OC1)C,COMPLETE
[H:1][CH2:2][C@:3]([H:4])(c1ccccc1)C(=O)N(C)C,COMPLETE
[H:1][CH2:2][C:3]([H:4])(c1ccccc1)c2ccccc2,COMPLETE
...,...
[CH3:1][N@@:2](c1ccccc1)[S:3](=O)(=O)[CH3:4],COMPLETE
[CH3:4][NH:3][S:2](=O)(=O)[c:1]1ccccc1,COMPLETE
c1cc[c:1](cc1)[S:2](=O)(=O)[N:3]2[CH2:4]CCC2,COMPLETE
C[N:2]([CH3:1])[S:3](=O)(=[O:4])c1ccccn1,COMPLETE


In [3]:
client = ptl.FractalClient()
dataset_names = [ dataidx[1] for dataidx in client.list_collections("TorsionDriveDataset").index ]
len(dataset_names)

42

# Picking a torsiondrive record (first item)

In [4]:
import qcelemental as qcel
import qcengine as qcng

def get_xtb_wbo_qce(qcmol, idx1, idx2):
    
    # xtb model
    model = qcel.models.AtomicInput(
        molecule=qcmol,
        driver="energy",
        model={"method": "GFN2-xTB"},    
    )

    # result of single point energy calculation
    result = qcng.compute(model, "xtb")

    return result.extras["xtb"]["mayer_indices"][idx1, idx2]

In [5]:
def get_wbo_from_dataset(client, ds_name):

    all_data = []
    f = 0
    logged = False
    ds = client.get_collection("TorsionDriveDataset", ds_name)
    print(f"starting {ds_name}")
    print(f"molecules: {ds.status('default',status='COMPLETE').default[0]}")
    for i, index in enumerate(ds.df.index):

        # get the record of each entry
        tdr = ds.get_record(name=index, specification='default')
        
        if f % 10 == 0 and logged == False:
            logged = True
            print(f"    {f}")

        if tdr.status == "COMPLETE":
            f += 1
            logged = False
            try:
                if len(tdr.final_energy_dict) == 0:
                    print(f"Molecule had no final energy dict {index}")
                    continue
                min_idx = min(tdr.final_energy_dict, key=tdr.final_energy_dict.get)
                record = tdr.get_history(min_idx, minimum=True)
                # get optimized molecule of the record
                qc_mol = record.get_final_molecule()

                # convert the qcelemental molecule to an OpenEye molecule
                qcjson_mol = qc_mol.dict(encoding='json')
                oemol = cmiles.utils.load_molecule(qcjson_mol)


                dihedrals = tdr.keywords.dihedrals[0]
                natoms = len(record.get_final_molecule().symbols)
                result = record.get_trajectory()[-1]
                
                wiberg = np.array(result.extras["qcvars"]["WIBERG_LOWDIN_INDICES"]).reshape(-1,natoms)
                qmwbo = wiberg[dihedrals[1], dihedrals[2]]



                # make a copy for am1 computations
                xtbwbo = get_xtb_wbo_qce(qc_mol, dihedrals[1], dihedrals[2])

                #                                      v  this makes different conformers have different dict entries.
                #                                           as far as I know, this just causes a warning in visuals
                smiles = oechem.OEMolToSmiles(oemol) + f"_{i}"


                this_molecule_data = ( smiles, ((qmwbo, xtbwbo), dihedrals) )
                all_data.append(this_molecule_data)
            except:
                print(f"ERROR {ds_name} {i}")
                continue
    print(f"finished {ds_name}")
    return all_data

    

In [6]:
pkldir = "xtb_benchmark_results"

if not os.path.exists(pkldir):
    os.makedirs(pkldir)
    
for dname in dataset_names[25:]:
    wbo = get_wbo_from_dataset(client, dname)
    fname = f"{pkldir}/{dname.replace(' ','')}.pkl"
    to_dump = [dname, wbo]
    with open(fname, "wb") as pkf:
        pickle.dump( to_dump, pkf )

starting OpenFF Protein Fragments TorsionDrives v1.0
molecules: 825
    0
    10
Molecule had no final energy dict gly_gln_ser-omega
    20
    30
    40
    50
    60
    70
    80
Molecule had no final energy dict ala_glh_gly-omega
Molecule had no final energy dict gly_glh_val-omega
    90
    100
    110
    120
    130
    140
    150
Molecule had no final energy dict val_ala_gly-omega
    160
Molecule had no final energy dict val_ala_ala-omega
    170
Molecule had no final energy dict ser_ala_ala-omega
    180
Molecule had no final energy dict ala_ala_gly-omega
Molecule had no final energy dict gly_ala_ala-omega
    190
Molecule had no final energy dict val_ala_val-omega
    200
Molecule had no final energy dict ala_ala_ala-omega
    210
Molecule had no final energy dict ala_hie_val-omega
    220
    230
    240
    250
    260
Molecule had no final energy dict ala_hie_gly-omega
    270
    280
    290
    300
    310
    320
    330
Molecule had no final energy dict gly_asn_gly-o

ERROR SiliconTX Torsion Benchmark Set 1 65
    50
ERROR SiliconTX Torsion Benchmark Set 1 66
ERROR SiliconTX Torsion Benchmark Set 1 67
ERROR SiliconTX Torsion Benchmark Set 1 68
ERROR SiliconTX Torsion Benchmark Set 1 69
ERROR SiliconTX Torsion Benchmark Set 1 70
ERROR SiliconTX Torsion Benchmark Set 1 71
ERROR SiliconTX Torsion Benchmark Set 1 72
ERROR SiliconTX Torsion Benchmark Set 1 73
ERROR SiliconTX Torsion Benchmark Set 1 74
ERROR SiliconTX Torsion Benchmark Set 1 75
    60
ERROR SiliconTX Torsion Benchmark Set 1 76
ERROR SiliconTX Torsion Benchmark Set 1 77
ERROR SiliconTX Torsion Benchmark Set 1 78
ERROR SiliconTX Torsion Benchmark Set 1 79
ERROR SiliconTX Torsion Benchmark Set 1 80
ERROR SiliconTX Torsion Benchmark Set 1 81
ERROR SiliconTX Torsion Benchmark Set 1 82
ERROR SiliconTX Torsion Benchmark Set 1 83
ERROR SiliconTX Torsion Benchmark Set 1 84
ERROR SiliconTX Torsion Benchmark Set 1 85
    70
ERROR SiliconTX Torsion Benchmark Set 1 86
ERROR SiliconTX Torsion Benchmark

ERROR SiliconTX Torsion Benchmark Set 1 273
ERROR SiliconTX Torsion Benchmark Set 1 274
ERROR SiliconTX Torsion Benchmark Set 1 275
ERROR SiliconTX Torsion Benchmark Set 1 276
ERROR SiliconTX Torsion Benchmark Set 1 277
ERROR SiliconTX Torsion Benchmark Set 1 278
ERROR SiliconTX Torsion Benchmark Set 1 280
    240
ERROR SiliconTX Torsion Benchmark Set 1 281
ERROR SiliconTX Torsion Benchmark Set 1 282
ERROR SiliconTX Torsion Benchmark Set 1 283
ERROR SiliconTX Torsion Benchmark Set 1 284
ERROR SiliconTX Torsion Benchmark Set 1 285
ERROR SiliconTX Torsion Benchmark Set 1 286
ERROR SiliconTX Torsion Benchmark Set 1 287
ERROR SiliconTX Torsion Benchmark Set 1 288
ERROR SiliconTX Torsion Benchmark Set 1 289
ERROR SiliconTX Torsion Benchmark Set 1 292
    250
ERROR SiliconTX Torsion Benchmark Set 1 293
ERROR SiliconTX Torsion Benchmark Set 1 294
ERROR SiliconTX Torsion Benchmark Set 1 295
ERROR SiliconTX Torsion Benchmark Set 1 296
ERROR SiliconTX Torsion Benchmark Set 1 297
ERROR SiliconTX 

ERROR SiliconTX Torsion Benchmark Set 1 492
ERROR SiliconTX Torsion Benchmark Set 1 493
ERROR SiliconTX Torsion Benchmark Set 1 494
ERROR SiliconTX Torsion Benchmark Set 1 495
    420
ERROR SiliconTX Torsion Benchmark Set 1 496
ERROR SiliconTX Torsion Benchmark Set 1 498
ERROR SiliconTX Torsion Benchmark Set 1 499
ERROR SiliconTX Torsion Benchmark Set 1 504
ERROR SiliconTX Torsion Benchmark Set 1 510
ERROR SiliconTX Torsion Benchmark Set 1 511
ERROR SiliconTX Torsion Benchmark Set 1 512
ERROR SiliconTX Torsion Benchmark Set 1 513
ERROR SiliconTX Torsion Benchmark Set 1 514
ERROR SiliconTX Torsion Benchmark Set 1 515
    430
ERROR SiliconTX Torsion Benchmark Set 1 517
ERROR SiliconTX Torsion Benchmark Set 1 518
ERROR SiliconTX Torsion Benchmark Set 1 519
ERROR SiliconTX Torsion Benchmark Set 1 520
ERROR SiliconTX Torsion Benchmark Set 1 521
ERROR SiliconTX Torsion Benchmark Set 1 522
ERROR SiliconTX Torsion Benchmark Set 1 524
ERROR SiliconTX Torsion Benchmark Set 1 525
ERROR SiliconTX 

ERROR SiliconTX Torsion Benchmark Set 1 725
    600
ERROR SiliconTX Torsion Benchmark Set 1 726
ERROR SiliconTX Torsion Benchmark Set 1 727
ERROR SiliconTX Torsion Benchmark Set 1 728
ERROR SiliconTX Torsion Benchmark Set 1 730
ERROR SiliconTX Torsion Benchmark Set 1 731
ERROR SiliconTX Torsion Benchmark Set 1 732
ERROR SiliconTX Torsion Benchmark Set 1 734
ERROR SiliconTX Torsion Benchmark Set 1 735
ERROR SiliconTX Torsion Benchmark Set 1 736
ERROR SiliconTX Torsion Benchmark Set 1 737
    610
ERROR SiliconTX Torsion Benchmark Set 1 738
ERROR SiliconTX Torsion Benchmark Set 1 739
ERROR SiliconTX Torsion Benchmark Set 1 740
ERROR SiliconTX Torsion Benchmark Set 1 741
ERROR SiliconTX Torsion Benchmark Set 1 742
ERROR SiliconTX Torsion Benchmark Set 1 743
ERROR SiliconTX Torsion Benchmark Set 1 745
ERROR SiliconTX Torsion Benchmark Set 1 746
ERROR SiliconTX Torsion Benchmark Set 1 747
ERROR SiliconTX Torsion Benchmark Set 1 748
    620
ERROR SiliconTX Torsion Benchmark Set 1 749
ERROR Si

ERROR SiliconTX Torsion Benchmark Set 1 930
ERROR SiliconTX Torsion Benchmark Set 1 931
ERROR SiliconTX Torsion Benchmark Set 1 936
ERROR SiliconTX Torsion Benchmark Set 1 939
ERROR SiliconTX Torsion Benchmark Set 1 940
ERROR SiliconTX Torsion Benchmark Set 1 941
ERROR SiliconTX Torsion Benchmark Set 1 942
ERROR SiliconTX Torsion Benchmark Set 1 943
    790
ERROR SiliconTX Torsion Benchmark Set 1 944
ERROR SiliconTX Torsion Benchmark Set 1 945
ERROR SiliconTX Torsion Benchmark Set 1 946
ERROR SiliconTX Torsion Benchmark Set 1 947
ERROR SiliconTX Torsion Benchmark Set 1 948
ERROR SiliconTX Torsion Benchmark Set 1 949
ERROR SiliconTX Torsion Benchmark Set 1 950
ERROR SiliconTX Torsion Benchmark Set 1 952
ERROR SiliconTX Torsion Benchmark Set 1 953
ERROR SiliconTX Torsion Benchmark Set 1 957
    800
ERROR SiliconTX Torsion Benchmark Set 1 959
ERROR SiliconTX Torsion Benchmark Set 1 961
ERROR SiliconTX Torsion Benchmark Set 1 962
ERROR SiliconTX Torsion Benchmark Set 1 963
ERROR SiliconTX 

ERROR SiliconTX Torsion Benchmark Set 1 1180
ERROR SiliconTX Torsion Benchmark Set 1 1181
ERROR SiliconTX Torsion Benchmark Set 1 1182
ERROR SiliconTX Torsion Benchmark Set 1 1183
ERROR SiliconTX Torsion Benchmark Set 1 1184
ERROR SiliconTX Torsion Benchmark Set 1 1185
ERROR SiliconTX Torsion Benchmark Set 1 1188
ERROR SiliconTX Torsion Benchmark Set 1 1189
    970
ERROR SiliconTX Torsion Benchmark Set 1 1190
ERROR SiliconTX Torsion Benchmark Set 1 1191
ERROR SiliconTX Torsion Benchmark Set 1 1192
ERROR SiliconTX Torsion Benchmark Set 1 1193
ERROR SiliconTX Torsion Benchmark Set 1 1194
ERROR SiliconTX Torsion Benchmark Set 1 1195
ERROR SiliconTX Torsion Benchmark Set 1 1196
ERROR SiliconTX Torsion Benchmark Set 1 1197
ERROR SiliconTX Torsion Benchmark Set 1 1198
ERROR SiliconTX Torsion Benchmark Set 1 1199
    980
ERROR SiliconTX Torsion Benchmark Set 1 1200
ERROR SiliconTX Torsion Benchmark Set 1 1201
ERROR SiliconTX Torsion Benchmark Set 1 1202
ERROR SiliconTX Torsion Benchmark Set 1

ERROR SiliconTX Torsion Benchmark Set 1 1382
ERROR SiliconTX Torsion Benchmark Set 1 1384
ERROR SiliconTX Torsion Benchmark Set 1 1386
ERROR SiliconTX Torsion Benchmark Set 1 1387
ERROR SiliconTX Torsion Benchmark Set 1 1389
ERROR SiliconTX Torsion Benchmark Set 1 1390
ERROR SiliconTX Torsion Benchmark Set 1 1392
ERROR SiliconTX Torsion Benchmark Set 1 1394
ERROR SiliconTX Torsion Benchmark Set 1 1395
    1150
ERROR SiliconTX Torsion Benchmark Set 1 1396
ERROR SiliconTX Torsion Benchmark Set 1 1397
ERROR SiliconTX Torsion Benchmark Set 1 1398
ERROR SiliconTX Torsion Benchmark Set 1 1399
ERROR SiliconTX Torsion Benchmark Set 1 1400
ERROR SiliconTX Torsion Benchmark Set 1 1401
ERROR SiliconTX Torsion Benchmark Set 1 1402
ERROR SiliconTX Torsion Benchmark Set 1 1403
ERROR SiliconTX Torsion Benchmark Set 1 1404
ERROR SiliconTX Torsion Benchmark Set 1 1405
    1160
ERROR SiliconTX Torsion Benchmark Set 1 1406
ERROR SiliconTX Torsion Benchmark Set 1 1407
ERROR SiliconTX Torsion Benchmark Set

In [None]:
dataset_names.index("OpenFF Protein Fragments TorsionDrives v1.0")