Here we are prototyping the collection of the optimization dataset before filtering out molecules with connectivity changes and internal hydrogen bonds in the optimized molecule.

Then we prepare the molecules in a new dataset which will have optimizations ran from the optimized geometry using one of the MM forcefields.

In [1]:
import qcportal as ptl
import numpy as np
from openforcefield.topology import Molecule
from qcsubmit.results import OptimizationCollectionResult
from qcsubmit.datasets import OptimizationDataset

client = ptl.FractalClient()


In [2]:
# # MM parameter fails
# ['[o-]s(=o)(=s)[o-]-0', 'c(=o)(o)[o-]-0']

# record = opt_ds.get_record('c(=o)(o)[o-]-0', 'default')

In [3]:
# record.get_final_molecule()

In [4]:
# pull the dataset and select the first 1000 records to pull down
opt_ds = client.get_collection('OptimizationDataset', 'OpenFF Full Optimization Benchmark 1')


In [5]:
# make a sub set of data
subset = list(opt_ds.data.records.keys())[:1000]


In [6]:
%time result = OptimizationCollectionResult.from_server(client=client, spec_name='default', dataset_name='OpenFF Full Optimization Benchmark 1', include_trajectory=False, final_molecule_only=True,subset=subset)
# note only 84 records are requested meaning 16 of the first 100 have errors or are incomplete
#result = OptimizationCollectionResult.from_server(client, 'default', 'OpenFF Full Optimization Benchmark 1')
#result = OptimizationCollectionResult.from_server(client, 'default', 'OpenFF Gen 2 Opt Set 2 Coverage')

requested molecules 930
requested results 930
CPU times: user 5.08 s, sys: 889 ms, total: 5.97 s
Wall time: 1min 40s


In [7]:
# loop over the molecules and flag conectivity changes
differences = {}
for index, record in result.collection.items():
    wbo_changes = record.detect_connectivity_changes_wbo(0.65)
    rule_changes = record.detect_connectivity_changes_heuristic()
    assert len(wbo_changes) == len(rule_changes)
    rec_dif = {}
    for key in wbo_changes.keys():
        if wbo_changes[key] != rule_changes[key]:
            rec_dif[key] = True
            
    differences[index] = rec_dif

In [8]:
differences

{'C[NH2+]C[C@@H](c1ccc(c(c1)O)O)O': {},
 'CO/N=C/1\\C[N@](C[C@H]1C[NH3+])c2c(cc3c(=O)c(cn(c3n2)C4CC4)C(=O)[O-])F': {},
 'c1cc(ccc1[C@H]2C[NH2+]CCc3c2cc(c(c3Cl)O)O)O': {},
 'c1cc(ccc1[C@H]2C[NH2+]CCc3c2cc(c(c3Cl)[O-])O)O': {},
 'c1c(cc(c(c1S(=O)(=O)N)Cl)Cl)S(=O)(=O)N': {},
 'C[NH+](C)CCC=C1c2ccccc2C=Cc3c1cccc3': {},
 'C[N@@H+]1CC[C@]23c4c5ccc(c4O[C@H]2C(=O)CC[C@]3([C@H]1C5)O)OC': {},
 'C[N@H+]1CC[C@]23c4c5ccc(c4O[C@H]2C(=O)CC[C@]3([C@H]1C5)O)OC': {},
 'C[N@]1CC[C@]23c4c5ccc(c4O[C@H]2C(=O)CC[C@]3([C@H]1C5)O)OC': {},
 'c1cc(cnc1)C(=O)N': {},
 'COc1ccccc1OC[C@H](CO)O': {},
 'C[C@](Cc1ccc(cc1)O)(C(=O)[O-])[NH3+]': {},
 'c1ccc(cc1)c2c(oc(n2)CCC(=O)[O-])c3ccccc3': {},
 'C(C[C@@](C(F)F)(C(=O)[O-])N)C[NH3+]': {},
 'C(C[C@@](C(F)F)(C(=O)[O-])[NH3+])C[NH3+]': {},
 'CC(C)C[C@H]1C(=O)N2CCC[C@H]2[C@]3(N1C(=O)[C@](O3)(C(C)C)NC(=O)[C@H]4C[N@]([C@@H]5Cc6c7c(cccc7[nH]c6Br)C5=C4)C)O': {},
 'CC(C)C[C@H]1C(=O)N2CCC[C@H]2[C@]3(N1C(=O)[C@](O3)(C(C)C)NC(=O)[C@H]4C[N@H+]([C@@H]5Cc6c7c(cccc7[nH]c6Br)C5=C4)C)O':

In [9]:
# loop over the molecules and flag conectivity changes
differences = {}
for index, record in result.collection.items():
    wbo_05 = record.detect_hydrogen_bonds_wbo(0.05)
    wbo_1 = record.detect_hydrogen_bonds_wbo(0.1)
    rec_dif = {}
    for key in wbo_05.keys():
        if wbo_05[key] != wbo_1[key]:
            rec_dif[key] = True
    differences[index] = rec_dif
differences

{'C[NH2+]C[C@@H](c1ccc(c(c1)O)O)O': {},
 'CO/N=C/1\\C[N@](C[C@H]1C[NH3+])c2c(cc3c(=O)c(cn(c3n2)C4CC4)C(=O)[O-])F': {0: True},
 'c1cc(ccc1[C@H]2C[NH2+]CCc3c2cc(c(c3Cl)O)O)O': {},
 'c1cc(ccc1[C@H]2C[NH2+]CCc3c2cc(c(c3Cl)[O-])O)O': {0: True, 1: True},
 'c1c(cc(c(c1S(=O)(=O)N)Cl)Cl)S(=O)(=O)N': {},
 'C[NH+](C)CCC=C1c2ccccc2C=Cc3c1cccc3': {},
 'C[N@@H+]1CC[C@]23c4c5ccc(c4O[C@H]2C(=O)CC[C@]3([C@H]1C5)O)OC': {},
 'C[N@H+]1CC[C@]23c4c5ccc(c4O[C@H]2C(=O)CC[C@]3([C@H]1C5)O)OC': {},
 'C[N@]1CC[C@]23c4c5ccc(c4O[C@H]2C(=O)CC[C@]3([C@H]1C5)O)OC': {0: True},
 'c1cc(cnc1)C(=O)N': {},
 'COc1ccccc1OC[C@H](CO)O': {},
 'C[C@](Cc1ccc(cc1)O)(C(=O)[O-])[NH3+]': {0: True, 1: True, 2: True},
 'c1ccc(cc1)c2c(oc(n2)CCC(=O)[O-])c3ccccc3': {},
 'C(C[C@@](C(F)F)(C(=O)[O-])N)C[NH3+]': {0: True},
 'C(C[C@@](C(F)F)(C(=O)[O-])[NH3+])C[NH3+]': {1: True, 3: True},
 'CC(C)C[C@H]1C(=O)N2CCC[C@H]2[C@]3(N1C(=O)[C@](O3)(C(C)C)NC(=O)[C@H]4C[N@]([C@@H]5Cc6c7c(cccc7[nH]c6Br)C5=C4)C)O': {},
 'CC(C)C[C@H]1C(=O)N2CCC[C@H]2[C@]3(N1C

In [10]:
# pull out a molecule that has changed connectivity by WBO of 0.74 this turns out the be the cmiles issue molecule
record = result.collection['c1cc(ccc1[C@H]2C[NH2+]CCc3c2cc(c(c3Cl)[O-])O)O']

In [11]:
opt_results = [(opt_rec.final_energy, i) for i, opt_rec in enumerate(record.entries)]
opt_results.sort(key=lambda x:x[0])
opt_results[0]

(-1360.03851847593, 0)

In [12]:
record.entries[0].final_molecule.molecule

NGLWidget()

In [13]:
record.entries[0].find_hydrogen_bonds_wbo(0.05)

[(17, 36)]

In [114]:
import mdtraj as md
record.entries[0].get_final_molecule().to_file('hbonds.pdb', 'pdb')
t = md.load_pdb('hbonds.pdb')
hbonds = md.baker_hubbard(t, periodic=False)

hbonds

array([[19, 36, 17]])

In [184]:
import numpy as np
wbo = np.array(record.entries[16].final_molecule.wbo).reshape((record.molecule.n_atoms, -1))
wbo[19,44]

0.8410867544201786

In [26]:
record.entries[0].final_molecule.molecule.connectivity

[(0, 2, 1.0),
 (0, 5, 2.0),
 (0, 28, 1.0),
 (1, 3, 2.0),
 (1, 20, 1.0),
 (1, 29, 1.0),
 (2, 6, 2.0),
 (2, 4, 1.0),
 (3, 4, 1.0),
 (3, 9, 1.0),
 (4, 24, 2.0),
 (5, 7, 1.0),
 (5, 27, 1.0),
 (6, 18, 1.0),
 (6, 20, 1.0),
 (7, 18, 2.0),
 (7, 21, 1.0),
 (8, 10, 1.0),
 (8, 14, 1.0),
 (8, 19, 2.0),
 (9, 23, 1.0),
 (9, 25, 2.0),
 (10, 21, 1.0),
 (10, 30, 1.0),
 (10, 31, 1.0),
 (11, 12, 1.0),
 (11, 15, 1.0),
 (11, 32, 1.0),
 (11, 33, 1.0),
 (12, 15, 1.0),
 (12, 34, 1.0),
 (12, 35, 1.0),
 (13, 14, 1.0),
 (13, 21, 1.0),
 (13, 36, 1.0),
 (13, 37, 1.0),
 (14, 17, 1.0),
 (14, 38, 1.0),
 (15, 20, 1.0),
 (15, 39, 1.0),
 (16, 26, 1.0),
 (16, 40, 1.0),
 (16, 41, 1.0),
 (16, 42, 1.0),
 (17, 22, 1.0),
 (17, 43, 1.0),
 (17, 44, 1.0),
 (19, 26, 1.0),
 (22, 45, 1.0),
 (22, 46, 1.0),
 (22, 47, 1.0)]

In [21]:
record.entries[0].final_molecule.guess_connectivity()

[(0, 2),
 (0, 5),
 (0, 28),
 (1, 3),
 (1, 20),
 (1, 29),
 (2, 4),
 (2, 6),
 (3, 4),
 (3, 9),
 (4, 24),
 (5, 7),
 (5, 27),
 (6, 18),
 (6, 20),
 (7, 18),
 (7, 21),
 (8, 10),
 (8, 14),
 (8, 19),
 (9, 23),
 (9, 25),
 (10, 21),
 (10, 30),
 (10, 31),
 (11, 12),
 (11, 15),
 (11, 32),
 (11, 33),
 (12, 15),
 (12, 34),
 (12, 35),
 (13, 14),
 (13, 21),
 (13, 36),
 (13, 37),
 (14, 17),
 (14, 38),
 (15, 20),
 (15, 39),
 (16, 26),
 (16, 40),
 (16, 41),
 (16, 42),
 (17, 22),
 (17, 43),
 (17, 44),
 (19, 26),
 (21, 47),
 (22, 45),
 (22, 46)]

In [31]:
# now lets pull another record this should be fine for entry 0 but the rest should of changed 
record = result.collection['C(C[C@@](C(F)F)(C(=O)[O-])N)C[NH3+]']

In [32]:
record.entries[2].final_molecule.molecule

NGLWidget()

In [33]:
# grab one more 
record = result.collection['CC(C)(C)c1cc(c(cc1NC(=O)c2cnc3ccccc3c2[O-])O)C(C)(C)C']

In [34]:
#potential hydrogens bonds 
record.entries[2].final_molecule.molecule

NGLWidget()

In [35]:
entry= opt_ds.get_record(record.entries[2].index, 'default')

In [36]:
entry.get_initial_molecule()

NGLWidget()

In [41]:
import numpy as np
opt_result = client.query_results(id=entry.trajectory[0])[0]

ref_wbo = np.array(opt_result.extras['qcvars']['WIBERG_LOWDIN_INDICES']).reshape((record.molecule.n_atoms, -1))
ref_wbo[26, 54]

0.0002859191447249724

In [38]:

wbo = np.array(record.entries[2].final_molecule.wbo).reshape((record.molecule.n_atoms, -1))
print(wbo)

[[0.0000000000000000e+00 1.4563681218812703e+00 1.6259248143488521e+00
  ... 8.7026027579852054e-08 4.0967622571268683e-05
  7.3623287735123145e-08]
 [1.4563681218812703e+00 0.0000000000000000e+00 1.1664155329396540e-01
  ... 2.0892282049585368e-07 3.0700125919038442e-05
  1.1825772874012465e-08]
 [1.6259248143488521e+00 1.1664155329396540e-01 0.0000000000000000e+00
  ... 1.7059812633436341e-07 3.5721499162818106e-04
  3.2446420127369781e-07]
 ...
 [8.7026027579852054e-08 2.0892282049585368e-07 1.7059812633436341e-07
  ... 0.0000000000000000e+00 5.2257957165194442e-07
  2.1919209187808649e-04]
 [4.0967622571268683e-05 3.0700125919038442e-05 3.5721499162818106e-04
  ... 5.2257957165194442e-07 0.0000000000000000e+00
  1.5618759609353301e-05]
 [7.3623287735123145e-08 1.1825772874012465e-08 3.2446420127369781e-07
  ... 2.1919209187808649e-04 1.5618759609353301e-05
  0.0000000000000000e+00]]


In [17]:
wbo[25,54]

0.7167717188302888

In [60]:
#now we nedd to create a new optimisation dataset
dataset = OptimizationDataset(dataset_name=result.dataset_name, dataset_tagline='MM benchmarks', basis='smirnoff', 
                              method='openff_unconstrained-1.0.0', spec_name='Parsley', spec_description='Parsley benchmark spec', program='openmm')

# make the dataset from the set we saved
# dataset = OptimizationDataset.parse_file('dataset.json')

In [61]:
dataset

OptimizationDataset(dataset_name='OpenFF Full Optimization Benchmark 1', dataset_tagline='MM benchmarks', method='openff_unconstrained-1.0.0', basis='smirnoff', program='openmm', maxiter=200, driver='gradient', scf_properties=['dipole', 'qudrupole', 'wiberg_lowdin_indices'], spec_name='Parsley', spec_description='Parsley benchmark spec', priority='normal', tag='openff', dataset={}, filtered_molecules={}, optimization_program=GeometricProcedure(program='geometric', coordsys='tric', enforce=0.0, epsilon=1e-05, reset=False, qccnv=False, molcnv=False, check=0, trust=0.1, tmax=0.3, maxiter=300, convergence_set='GAU', constraints={}))

In [62]:
# now add some data to the set excluding any bonding changes
# build a dictionary of data to input
valid_molecules = {}


for index, record in result.collection.items():
    # check for conectivity changes
    conect_check = record.detect_conectivity_changes()
    # build a list of entries to keep 
    valid = [record.entries[entry] for (entry, changed) in conect_check.items() if not changed]
    if len(valid) > 0:
        # now we need to find the lowest energy final molecule in the set
        valid.sort(key=lambda x: x.final_energy)
        # now store this molecule
        valid_molecules[index] = valid[0]
    else:
        continue
    
            
    # get the lowest energy minimisation

In [63]:
# now we want to create the molecules with the cmiles in the extras fields and store the QM entry in the metadata
for index, entry in valid_molecules.items():
    attributes = result.collection[index].cmiles
    QM_ID = str(entry.id)
    # put the QM_ID into the metadata
    attributes['qm_entry'] = QM_ID
    molecule = entry.final_molecule.molecule.dict()
    # put the cmiles into the molecule extras
    molecule['extras'] = {'cmiles': {'canonical_isomeric_explicit_hydrogen_mapped_smiles': entry.cmiles}}
    mol = ptl.models.Molecule.from_data(molecule)
    dataset.dataset[entry.index] = {'initial_molecules': [mol], 'attributes':attributes}
    
    

In [64]:
# now export this faraction of the dataset
dataset.export_dataset('dataset5000_6000.json')

In [4]:
# now start a local client and submit the dataset
# from qcfractal import FractalSnowflakeHandler
# # set up the server and handler
# server = FractalSnowflakeHandler()
# server

# # now we need to remake the client using portal to trick the dataset
# new_client = ptl.FractalClient(server)

In [65]:
dataset.n_molecules

138

In [5]:
# new_client

In [6]:
# now try and submit the data to run
# response = dataset.submit(client=new_client)

In [29]:
# check the dataset is running
# mm_opt = new_client.get_collection('OptimizationDataset', 'OpenFF Full Optimization Benchmark 1')

In [31]:
# mm_opt.status('Parsley')

Unnamed: 0,Parsley
COMPLETE,13


In [10]:
# record = mm_opt.get_record('c[nh2+]c[c@@h](c1ccc(c(c1)o)o)o-3', 'Parsley')

In [16]:
# record.get_initial_molecule()

array([[ 5.92668909, -2.5174955 ,  4.17696576],
       [ 3.75058704, -1.50758183,  5.29442821],
       [ 3.77903502, -2.65371325,  0.13459745],
       [ 5.9557968 , -3.08951008,  1.59881089],
       [ 1.60195459, -1.06411744,  3.8405537 ],
       [ 1.60648614, -1.64850165,  1.23868383],
       [10.04599126,  1.61829812, -3.88259348],
       [10.03052882, -1.85176596, -0.5556078 ],
       [ 8.37031991, -4.03821526,  0.37318519],
       [ 8.64956895, -0.53044157, -2.66994834],
       [-0.62890221, -0.09344984,  4.72465518],
       [-0.47381789, -1.25713193, -0.20917425],
       [ 7.95638675, -5.49611623, -1.86499496],
       [ 7.57959947, -2.87382756,  5.3437351 ],
       [ 3.73286395, -1.08329344,  7.30566302],
       [ 3.68084255, -3.19854062, -1.84364644],
       [11.80628998,  0.8988585 , -4.67287735],
       [ 8.87043607,  2.41739851, -5.37396895],
       [10.43983758,  3.04144555, -2.44704628],
       [11.8080265 , -2.54845371, -1.33791889],
       [10.39327026, -0.47537755,  0.933

In [29]:
record.get_initial_molecule().geometry

array([[ 5.92668909, -2.5174955 ,  4.17696576],
       [ 3.75058704, -1.50758183,  5.29442821],
       [ 3.77903502, -2.65371325,  0.13459745],
       [ 5.9557968 , -3.08951008,  1.59881089],
       [ 1.60195459, -1.06411744,  3.8405537 ],
       [ 1.60648614, -1.64850165,  1.23868383],
       [10.04599126,  1.61829812, -3.88259348],
       [10.03052882, -1.85176596, -0.5556078 ],
       [ 8.37031991, -4.03821526,  0.37318519],
       [ 8.64956895, -0.53044157, -2.66994834],
       [-0.62890221, -0.09344984,  4.72465518],
       [-0.47381789, -1.25713193, -0.20917425],
       [ 7.95638675, -5.49611623, -1.86499496],
       [ 7.57959947, -2.87382756,  5.3437351 ],
       [ 3.73286395, -1.08329344,  7.30566302],
       [ 3.68084255, -3.19854062, -1.84364644],
       [11.80628998,  0.8988585 , -4.67287735],
       [ 8.87043607,  2.41739851, -5.37396895],
       [10.43983758,  3.04144555, -2.44704628],
       [11.8080265 , -2.54845371, -1.33791889],
       [10.39327026, -0.47537755,  0.933

In [22]:
# make sure we can find the entry in the database still
opt = client.query_procedures(id=[6091739])[0]

In [32]:
opt.get_final_molecule().geometry

array([[ 5.9266890872178815, -2.517495502742451 ,  4.176965762344392 ],
       [ 3.7505870434024358, -1.5075818337257223,  5.294428206063497 ],
       [ 3.7790350173584635, -2.6537132453703056,  0.1345974452144877],
       [ 5.955796803056457 , -3.089510078906738 ,  1.5988108865682291],
       [ 1.6019545920163263, -1.0641174371107887,  3.8405537042185003],
       [ 1.6064861402896544, -1.6485016452039225,  1.2386838306736936],
       [10.045991261267842 ,  1.6182981215224963, -3.882593484093834 ],
       [10.030528820227318 , -1.851765962354614 , -0.555607800301766 ],
       [ 8.370319910053892 , -4.038215255087373 ,  0.3731851920939178],
       [ 8.649568948750694 , -0.5304415745624468, -2.669948337299501 ],
       [-0.6289022140306975, -0.0934498389828873,  4.724655184978758 ],
       [-0.4738178868279123, -1.2571319337384739, -0.2091742469174727],
       [ 7.956386753111268 , -5.496116233930341 , -1.8649949551438165],
       [ 7.579599473252573 , -2.8738275559474684,  5.34373510359

In [82]:
# grab the optimization dataset
entry = opt_ds.get_entry('C[NH2+]C[C@@H](c1ccc(c(c1)O)O)O-0')

In [83]:
entry

OptEntry(name='C[NH2+]C[C@@H](c1ccc(c(c1)O)O)O-0', initial_molecule='3757419', additional_keywords={}, attributes={'canonical_explicit_hydrogen_smiles': '[H]c1c(c(c(c(c1C([H])(C([H])([H])[N+]([H])([H])C([H])([H])[H])O[H])[H])O[H])O[H])[H]', 'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[H:14][c:1]1[c:2]([c:5]([c:6]([c:3]([c:4]1[C@:9]([H:22])([C:8]([H:20])([H:21])[N+:10]([H:23])([H:24])[C:7]([H:17])([H:18])[H:19])[O:13][H:27])[H:16])[O:12][H:26])[O:11][H:25])[H:15]', 'canonical_isomeric_explicit_hydrogen_smiles': '[H]c1c(c(c(c(c1[C@]([H])(C([H])([H])[N+]([H])([H])C([H])([H])[H])O[H])[H])O[H])O[H])[H]', 'canonical_isomeric_smiles': 'C[NH2+]C[C@@H](c1ccc(c(c1)O)O)O', 'canonical_smiles': 'C[NH2+]CC(c1ccc(c(c1)O)O)O', 'inchi_key': 'UCTWMZQNUQWSLP-VIFPVBQESA-O', 'molecular_formula': 'C9H14NO3', 'provenance': 'cmiles_0+unknown_openeye_2019.Apr.2', 'standard_inchi': 'InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/p+1/t9-/m0/s1', 'unique_protomer_representation':

In [12]:
opt_ds.df.to_dict()['default']['C1CC1NC(=O)C[N@@]2C[C@H](CO2)O-0'].dict()

{'id': '18433463',
 'hash_index': '4ffc5a29d0c8496cfc5cb0834d80981b80905c3a',
 'procedure': 'optimization',
 'program': 'geometric',
 'version': 1,
 'protocols': {},
 'extras': {},
 'stdout': '21575257',
 'stderr': None,
 'error': None,
 'task_id': None,
 'manager_name': 'MolSSI_ARC_Cascades_Parsl-calogin1-acbbd830-1dea-4078-943a-a8d49c872b7f',
 'status': <RecordStatusEnum.complete: 'COMPLETE'>,
 'modified_on': datetime.datetime(2020, 3, 24, 1, 39, 55, 488995),
 'created_on': datetime.datetime(2020, 3, 24, 0, 26, 1, 811861),
 'provenance': {'creator': 'geomeTRIC',
  'version': '0.9.7.2',
  'routine': 'geometric.run_json.geometric_run_json',
  'username': 'mwelborn',
  'wall_time': 1182.4561066627502,
  'qcengine_version': '0.14.0',
  'cpu': 'Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz',
  'hostname': 'ca066'},
 'schema_version': 1,
 'initial_molecule': '12115251',
 'qc_spec': {'driver': <DriverEnum.gradient: 'gradient'>,
  'method': 'b3lyp-d3bj',
  'basis': 'dzvp',
  'keywords': '2',
  '

In [None]:
# build a list of all of the procedure jobs in the default spec
# this will act like the records data, index by ds.df.index, value is the object map for default
molecule_data = {}

for index, opt_entry in opt_ds.data.records.items():
    molecule_data[index] = opt_entry.object_map['default']


In [None]:
# now we need to pull the data of each record in chunks
# we need the final molecule structure and the WBO then we should compare

In [None]:
# fill out the links 
df = opt_ds.query('default')
    

In [None]:
opt_ds.to_json()

In [None]:
new = ptl.collections.OptimizationDataset.from_json(opt_ds.to_json())

In [None]:
new = ptl.collections.OptimizationDataset.from_server(client, 'OpenFF Full Optimization Benchmark 1')

In [None]:
new.list_specifications()

In [None]:
df.loc[df.index[0]].dict()

In [None]:
df.loc[df.index[0]].get_final_molecule().dict()

In [None]:
opt_ds.data.records

In [None]:
opt_ds.data.specs['default'].qc_spec

In [None]:
client.query_keywords(2)[0].values['scf_properties']

In [38]:
dataset.export_dataset('dataset.json')

In [39]:
copy_dataset = OptimizationDataset.parse_file('dataset.json')

In [41]:
copy_dataset.dataset['c[nh2+]c[c@@h](c1ccc(c(c1)o)o)o-3']

{'initial_molecules': [{'schema_name': 'qcschema_molecule',
   'schema_version': 2,
   'validated': True,
   'symbols': ['C',
    'C',
    'C',
    'C',
    'C',
    'C',
    'C',
    'C',
    'C',
    'N',
    'O',
    'O',
    'O',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H',
    'H'],
   'geometry': [5.9266890872178815,
    -2.517495502742451,
    4.176965762344392,
    3.7505870434024358,
    -1.5075818337257223,
    5.294428206063497,
    3.7790350173584635,
    -2.6537132453703056,
    0.13459744521448766,
    5.955796803056457,
    -3.089510078906738,
    1.5988108865682291,
    1.6019545920163263,
    -1.0641174371107887,
    3.8405537042185003,
    1.6064861402896544,
    -1.6485016452039225,
    1.2386838306736936,
    10.045991261267842,
    1.6182981215224963,
    -3.882593484093834,
    10.030528820227318,
    -1.851765962354614,
    -0.555607800301766,
    8.370319910053892,
    -4.038215255087373,
  