In [1]:
from qcportal import PortalClient
from openff.qcsubmit.results import BasicResultCollection
client = PortalClient(address="https://api.qcarchive.molssi.org:443/")

# Look at results of "successful" calculations

In [2]:
ddx_dataset = BasicResultCollection.from_server(client=client,datasets=['OpenFF NAGL2 ESP Timing Benchmark v1.1'],spec_name='pbe0/def2-TZVPPD/ddx-water')

In [3]:
vac_dataset = BasicResultCollection.from_server(client=client,datasets=['OpenFF NAGL2 ESP Timing Benchmark v1.1'],spec_name='pbe0/def2-TZVPPD')

In [4]:
ddx_dataset.n_results

664

In [5]:
vac_dataset.n_results

1009

In [6]:
ddx_dataset_records = ddx_dataset.to_records()

In [7]:
vac_dataset_records = vac_dataset.to_records()

In [8]:
ddx_mol_ids = [record[0].molecule_id for record in ddx_dataset_records]

In [9]:
vac_mol_ids = [record[0].molecule_id for record in vac_dataset_records if record[0].molecule_id in ddx_mol_ids]

In [10]:
vac_records_w_ddx = [record for record in vac_dataset_records if record[0].molecule_id in ddx_mol_ids]

## Analyze molecules that were successful for both DDX and vacuum, to see if there are large differences between their charges.

For more information, see this issue: https://github.com/psi4/psi4/issues/3135

In [11]:
import numpy as np
n_mbis_diff = 0
n_mbis_diff_i = 0
n_lowdin_diff = 0
n_mulliken_diff = 0
n_dipole_diff = 0
for i,record in enumerate(ddx_dataset_records):
    ddx_record = record[0]
    vac_record = vac_records_w_ddx[i][0]
    # if vac_record.molecule_id != ddx_record.molecule_id: print('False') # Always True

    ddx_mbis = np.array(ddx_record.properties['mbis charges'])
    vac_mbis = np.array(vac_record.properties['mbis charges'])
    
    ddx_lowdin = np.array(ddx_record.properties['lowdin charges'])
    vac_lowdin = np.array(vac_record.properties['lowdin charges'])

    ddx_mulliken = np.array(ddx_record.properties['mulliken charges'])
    vac_mulliken = np.array(vac_record.properties['mulliken charges'])

    ddx_dipole = np.array(ddx_record.properties['pbe0 dipole'])
    vac_dipole = np.array(vac_record.properties['pbe0 dipole'])
    if np.any(np.abs(vac_mbis - ddx_mbis) > 1) :
        print(record[1].to_smiles())
        print(ddx_mbis)
        print(vac_mbis)
        print(np.abs(vac_mbis - ddx_mbis))
        n_mbis_diff += 1
        if "I" in record[1].to_smiles():
            n_mbis_diff_i += 1
    
    elif np.any(np.abs(vac_lowdin - ddx_lowdin) > 1):
        print(record[1].to_smiles())
        print(ddx_lowdin)
        print(vac_lowdin)
        print(np.abs(vac_lowdin - ddx_lowdin))
        n_lowdin_diff += 1

    elif np.any(np.abs(vac_mulliken - ddx_mulliken) > 1):
        print(record[1].to_smiles())
        print(ddx_mulliken)
        print(vac_mulliken)
        print(np.abs(vac_mulliken - ddx_mulliken))
        n_mulliken_diff += 1
    
    elif np.any(np.abs(vac_dipole - ddx_dipole) > 2):
        print(record[1].to_smiles())
        print(ddx_dipole)
        print(vac_dipole)
        print(np.abs(vac_dipole - ddx_dipole))
        n_dipole_diff += 1

[H]c1c(c(c(c(c1[H])I)[H])C#N)[H]
[  1.           3.60031156   2.85232584   2.38561488   3.12157664
   3.88654878   2.8239613  -10.90453539   2.43361449 -16.35612666
   0.31259664   4.17740115   0.6666496 ]
[ 0.15602108 -0.13456854 -0.12563828 -0.02197516 -0.10464107 -0.08764206
 -0.09317167  0.16498806 -0.00313427  0.17829563  0.32812837 -0.42071874
  0.16405501]
[8.43978917e-01 3.73488010e+00 2.97796412e+00 2.40759004e+00
 3.22621771e+00 3.97419083e+00 2.91713297e+00 1.10695234e+01
 2.43674876e+00 1.65344223e+01 1.55317273e-02 4.59811989e+00
 5.02594590e-01]
[H]c1c(c(c(nc1[H])Br)I)[H]
[  1.           3.91306394   3.9425604    2.57785436   2.49849821
   2.99060192   1.10186381   0.8093061   -1.52489838  -3.49412554
 -13.81361521]
[ 0.170136   -0.2498774  -0.02578518 -0.19979317  0.32610849 -0.37770589
  0.14849117  0.11751963 -0.10831896  0.03685203  0.16244543]
[ 0.829864    4.16294134  3.96834557  2.77764753  2.17238972  3.36830781
  0.95337263  0.69178647  1.41657941  3.53097757 13.

In [12]:
print('MBIS differences:     ', n_mbis_diff,n_mbis_diff_i)
print('Lowdin differences:   ',n_lowdin_diff)
print('Mulliken differences: ',n_mulliken_diff)
print('Dipole differences:   ', n_dipole_diff)

MBIS differences:      51 13
Lowdin differences:    0
Mulliken differences:  0
Dipole differences:    0


Tried cutoffs for the "problematic" charge difference of 0.15, 0.25, and 0.5. Results with `0.15 < charge difference < 0.5` seemed fine. Using `0.5 < charge difference` only yielded differences larger than ~6. I decided to use 1 just for flexibility, but anything between 0.5 and 5 should give the same result.

Note that these were exclusive categories, so 0 Lowdin charge differences indicates that no molecules had a large Lowdin charge difference but _not_ a large MBIS difference. Typically all the molecules with a large MBIS charge difference also had large Lowdin and Mulliken charge differences.

Luckily not too many of the sucessful calculations are secretly buggy.

# Look at Errored molecules
Need to do this directly through QCA because the above just downloaded the complete records

In [13]:
client.query_singlepoints()

<qcportal.record_models.RecordQueryIterator at 0x19674bc10>

In [14]:
dataset_qcp = client.get_dataset(dataset_name='OpenFF NAGL2 ESP Timing Benchmark v1.1',dataset_type='SinglePoint')

In [15]:
dataset_qcp.status()

{'pbe0/def2-TZVPPD': {<RecordStatusEnum.complete: 'complete'>: 1009},
 'pbe0/def2-TZVPPD/ddx-water': {<RecordStatusEnum.complete: 'complete'>: 664,
  <RecordStatusEnum.error: 'error'>: 345}}

## Sort molecules by error type

Three categories:
* ADIIS convergence failure: Accelerated DIIS portion of the SCF failed, never even gets to regular DIIS.
* SCF convergence failure: Overall SCF convergence fails, but ADIIS was successful.
* MBIS convergence failure: Overall SCF converges, but MBIS algorithm fails to converge. Typically indicates convergence to a "weird" wavefunction.



In [16]:
mbis_err = []
scf_conv_err = []
adiis_err = []
i_errs = []
for e_name, s_name, record in dataset_qcp.iterate_records(status='error'):
    if "I" in e_name:
        i_errs.append((e_name,s_name))
    if 'could not converge MBIS' in dataset_qcp.get_record(e_name, s_name).error['error_message']:
        mbis_err.append((e_name,s_name))
    elif 'Could not converge SCF iterations in 200 iterations' in dataset_qcp.get_record(e_name, s_name).error['error_message']:
        scf_conv_err.append((e_name,s_name))
    elif 'ADIIS minimization failed' in dataset_qcp.get_record(e_name, s_name).error['error_message']:
        adiis_err.append((e_name,s_name))
    else:
        print(e_name,s_name)       

In [17]:
print('MBIS convergence error:  ',len(mbis_err))
print('SCF convergence error:   ',len(scf_conv_err))
print('ADIIS convergence error: ',len(adiis_err))
print("total errors with I:     ",len(i_errs))

MBIS convergence error:   209
SCF convergence error:    113
ADIIS convergence error:  23
total errors with I:      22


Total there are 53 I-containing molecules, 22 crashed and 13 converged but gave large discrepencies between PCM and vacuum

## Examining error files of different error types to debug/re-run locally

All tested molecules without I converged using PBE0 and expanding the extent of the radial grid using `dft_bs_radius_alpha 5.0`

In [22]:
print(dataset_qcp.get_record(scf_conv_err[2][0], scf_conv_err[2][1]).error['error_message'])

QCEngine Unknown Error: 
    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.9.1 release

                         Git: Rev {} zzzzzzz 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,

In [23]:
print(dataset_qcp.get_record(mbis_err[1][0], mbis_err[1][1]).error['error_message'])

QCEngine Unknown Error: 
    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.9.1 release

                         Git: Rev {} zzzzzzz 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,

In [19]:
scf_conv_err_i = [entry for entry in scf_conv_err if 'I' in entry[0]]

In [20]:
scf_conv_err_i

[('C1=C(SC(=N1)N)I', 'pbe0/def2-TZVPPD/ddx-water'),
 ('Cc1cc(c(c(c1)I)N)C', 'pbe0/def2-TZVPPD/ddx-water'),
 ('CSc1cc(cnc1)I', 'pbe0/def2-TZVPPD/ddx-water')]

In [50]:
print(dataset_qcp.get_record(scf_conv_err_i[2][0],scf_conv_err_i[2][1]).error['error_message'])

QCEngine Unknown Error: 
    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.9.1 release

                         Git: Rev {} zzzzzzz 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,

## Examining error messages of iodine-containing crashes

After local testing, we found that we were able to converge 7/10 of these I-containing test calculations using `wb97x-d` instead of `pbe0`, and by expanding the extent of the radiial grid using `dft_bs_radius_alpha 5.0`. 2/3 of those that did not converge converged using the `huckel` guess. So we will proceed with this methodology.

In [23]:
mbis_err_i = [entry for entry in mbis_err if 'I' in entry[0]]

In [24]:
len(mbis_err_i)

18

In [44]:
print(dataset_qcp.get_record(mbis_err_i[1][0],mbis_err_i[1][1]).molecule)

Molecule(name='C7H5IN2', formula='C7H5IN2', hash='32342aa')


In [49]:
print(dataset_qcp.get_record(mbis_err_i[5][0],mbis_err_i[5][1]).error['error_message'])

QCEngine Unknown Error: 
    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.9.1 release

                         Git: Rev {} zzzzzzz 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,

In [51]:
mbis_err_i[5][1]

'pbe0/def2-TZVPPD/ddx-water'

In [54]:
print(dataset_qcp.get_record(mbis_err_i[3][0],'pbe0/def2-TZVPPD').stdout)


    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.9.1 release

                         Git: Rev {} zzzzzzz 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,
    J. F. Gonthier, R. 

In [21]:
adiis_err_i = [entry for entry in adiis_err if 'I' in entry[0]]

In [22]:
adiis_err_i

[('C1COC(C1I)CO', 'pbe0/def2-TZVPPD/ddx-water')]

In [34]:
print(dataset_qcp.get_record(adiis_err_i[0][0],adiis_err_i[0][1]).error['error_message'])

QCEngine Unknown Error: 
    -----------------------------------------------------------------------
          Psi4: An Open-Source Ab Initio Electronic Structure Package
                               Psi4 1.9.1 release

                         Git: Rev {} zzzzzzz 


    D. G. A. Smith, L. A. Burns, A. C. Simmonett, R. M. Parrish,
    M. C. Schieber, R. Galvelis, P. Kraus, H. Kruse, R. Di Remigio,
    A. Alenaizan, A. M. James, S. Lehtola, J. P. Misiewicz, M. Scheurer,
    R. A. Shaw, J. B. Schriber, Y. Xie, Z. L. Glick, D. A. Sirianni,
    J. S. O'Brien, J. M. Waldrop, A. Kumar, E. G. Hohenstein,
    B. P. Pritchard, B. R. Brooks, H. F. Schaefer III, A. Yu. Sokolov,
    K. Patkowski, A. E. DePrince III, U. Bozkaya, R. A. King,
    F. A. Evangelista, J. M. Turney, T. D. Crawford, C. D. Sherrill,
    J. Chem. Phys. 152(18) 184108 (2020). https://doi.org/10.1063/5.0006002

                            Additional Code Authors
    E. T. Seidl, C. L. Janssen, E. F. Valeev, M. L. Leininger,