# Compute Energies of Random Offsets
Form a training set for approximate hessians by computing energies at many random displacements.

In [1]:
from ase.calculators.psi4 import Psi4
from ase.io import write, read
from ase.db import connect
from ase import Atoms
from pathlib import Path
from tqdm import tqdm 
import numpy as np
import os

Configuration

In [2]:
molecule_name = 'caffeine'
method = 'hf'
basis = 'def2-svpd'
threads = min(os.cpu_count(), 12)
step_size: float = 0.005 # Perturbation amount, used as maximum L2 norm

Derived

In [3]:
run_name = f'{molecule_name}_{method}_{basis}'

## Load in the Relaxed Structure
We generated a relaxed structure in the previous notebook

In [4]:
atoms = read(Path('data') / 'exact' / f'{run_name}.xyz')
atoms

Atoms(symbols='O2N4C8H10', pbc=False, forces=..., calculator=SinglePointCalculator(...))

## Compute many random energies
Compute $3N(3N+1)/2 + 1$ energies with displacements sampled [on the unit sphere](https://mathoverflow.net/questions/24688/efficiently-sampling-points-uniformly-from-the-surface-of-an-n-sphere). This is enough to fit the Hessian exactly plus a little more

Prepare the output directory

In [5]:
out_dir = Path('data') / 'approx'
out_dir.mkdir(exist_ok=True)

In [6]:
db_path = out_dir / f'{run_name}-random-d={step_size:.2e}.db'

Add the relaxed geometry if needed

In [7]:
if not db_path.is_file():
    with connect(db_path) as db:
        db.write(atoms)

Make the calculator

In [8]:
calc = Psi4(method=method, basis=basis, num_threads=threads, memory='4096MB')


  Memory set to   3.815 GiB by Python driver.
  Threads set to 12 by Python driver.


Generate the energies

In [9]:
n_atoms = len(atoms)
to_compute = 3 * n_atoms * (3 * n_atoms + 1) // 2 + 1
print(f'Need to run {to_compute} calculations for full accuracy.')

Need to run 2629 calculations for full accuracy.


In [10]:
with connect(db_path) as db:
    done = len(db)
print(f'Already done {done}. {to_compute - done} left to do.')

Already done 1. 2628 left to do.


In [None]:
pbar = tqdm(total=to_compute)
pbar.update(done)
for i in range(to_compute - done):
    # Sample a perturbation
    disp = np.random.normal(0, 1, size=(n_atoms * 3))
    disp -= disp.mean()
    disp /= np.linalg.norm(disp)
    disp *= step_size
    disp = disp.reshape((-1, 3))

    # Make the new atoms
    new_atoms = atoms.copy()
    new_atoms.positions += disp

    # Compute the energy and store in the db
    new_atoms.calc = calc
    new_atoms.get_potential_energy()
    with connect(db_path) as db:
        db.write(new_atoms)

    pbar.update(1)

  0%|                                                                                                                                                                                                                                                            | 0/2629 [00:00<?, ?it/s]


  Memory set to   3.815 GiB by Python driver.
  Threads set to 12 by Python driver.


 10%|███████████████████████                                                                                                                                                                                                                         | 252/2629 [28:25<4:10:48,  6.33s/it]