In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
import torch
torch.set_default_dtype(torch.float64)
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

In [7]:
from os import path
from glob import glob
import numpy as np
import matplotlib.pylab as plt

from se3cnn.point_utils import difference_matrix
from pao_file_utils import parse_pao_file, write_pao_file
from pao_tfn_dataset import encode_kind
from pao_tfn_trainer import train_pao_tfn, loss_function
from cp2k_file_utils import read_energy

In [8]:
# Load training data and hard code metadata.
pao_files = sorted(glob("2H2O_MD/frame_*/2H2O_pao44-1_0.pao"))

prim_basis_shells = {
    'H': [2, 1, 0], # two s-shells, one p-shell, no d-shells
    'O': [2, 2, 1], # two s-shells, two p-shells, one d-shell
}

pao_basis_size = 4

In [78]:
# Use the first few frames to train networks for Hydrogen and Oxygen.
train_params = dict(
    prim_basis_shells=prim_basis_shells,
    pao_basis_size=pao_basis_size,
    pao_files=pao_files[:10],  # number of frames used as training data
    num_hidden=1,  # number of hidden layers
    max_epochs=100,  # number of training epochs
)

net_H = train_pao_tfn(**train_params, kind_name="H")
net_O = train_pao_tfn(**train_params, kind_name="O")

Training NN for kind H using 40 samples.
Epoch: 0  Loss: 0.190283
Epoch: 20  Loss: 0.00437398
Epoch: 40  Loss: 0.00148966
Epoch: 60  Loss: 0.000669045
Epoch: 80  Loss: 0.000579661
Training NN for kind O using 20 samples.
Epoch: 0  Loss: 0.0764235
Epoch: 20  Loss: 0.00100802
Epoch: 40  Loss: 8.22559e-05
Epoch: 60  Loss: 4.7135e-05
Epoch: 80  Loss: 3.14158e-05


In [79]:
# Use the trained networks to inferre xblocks for all frames and calc their loss.
xblocks_inferred = []
losses = []

for fn in pao_files:
    kinds, atom2kind, coords, xblocks = parse_pao_file(fn)
    kind_onehot = encode_kind(atom2kind)
    natoms = coords.shape[0]
    losses.append(0.0)
    xblocks_inferred.append([])
    for iatom in range(natoms):
        kind_onehot_torch = torch.as_tensor(kind_onehot[None,...])
        coords_torch = torch.as_tensor(coords[None,...])
        diff_M = difference_matrix(coords_torch)
        if atom2kind[iatom] == "H":
            output_net = net_H(kind_onehot_torch, diff_M)
            xblock_net = net_H.decode_xblock(output_net[0,:,0])
        elif atom2kind[iatom] == "O":
            output_net = net_O(kind_onehot_torch, diff_M)
            xblock_net = net_O.decode_xblock(output_net[0,:,iatom])
        xblocks_inferred[-1].append(xblock_net.detach().numpy())
        loss = loss_function(xblock_net, torch.as_tensor(xblocks[iatom])) / natoms
        losses[-1] += loss.item()

plt.xlabel("Frame")
plt.ylabel("Loss")
plt.plot(losses);

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [7]:
# Run CP2K on a few of the inferred frames. DFT Energy difference below 1 milliHartree would be nice.
verify_frames = (0, 1, 2, 3, 4, 10, 20)

for iframe, fn in enumerate(pao_files):
    if iframe not in verify_frames:
        continue
    print("Working on: "+fn)
    kinds, atom2kind, coords, xblocks = parse_pao_file(fn)
    frame_dir = path.dirname(fn)
    fn_inferred = frame_dir + "/inferred.pao"
    write_pao_file(fn_inferred, kinds, atom2kind, coords, xblocks_inferred[iframe])
    ! cd $frame_dir; /opt/cp2k/exe/local/cp2k.ssmp -i 2H2O_pao44_inferred.inp > 2H2O_pao44_inferred.out
    pao_energy = read_energy(frame_dir+"/2H2O_pao44.out")
    pao_ml_energy = read_energy(frame_dir+"/2H2O_pao44_inferred.out")
    print("Loss: {:g} Energy-diff: {:g} Hartree\n".format(losses[iframe], pao_ml_energy - pao_energy))

Working on: 2H2O_MD/frame_0000/2H2O_pao44-1_0.pao
Loss: 5.10554e-06 Energy-diff: 7.28263e-05 Hartree

Working on: 2H2O_MD/frame_0025/2H2O_pao44-1_0.pao
Loss: 8.89534e-06 Energy-diff: 7.73257e-05 Hartree

Working on: 2H2O_MD/frame_0050/2H2O_pao44-1_0.pao
Loss: 0.00152929 Energy-diff: 0.00194181 Hartree

Working on: 2H2O_MD/frame_0075/2H2O_pao44-1_0.pao
Loss: 0.00585866 Energy-diff: 0.00594532 Hartree

Working on: 2H2O_MD/frame_0100/2H2O_pao44-1_0.pao
Loss: 0.0104333 Energy-diff: 0.0102591 Hartree

Working on: 2H2O_MD/frame_0250/2H2O_pao44-1_0.pao
Loss: 0.0307013 Energy-diff: 0.06961 Hartree

Working on: 2H2O_MD/frame_0500/2H2O_pao44-1_0.pao
Loss: 0.0106711 Energy-diff: 0.0195768 Hartree



In [36]:
! cd  2H2O_MD/frame_0000; /opt/cp2k/exe/local/cp2k.ssmp -i test.inp

 DBCSR| CPU Multiplication driver                                           XSMM
 DBCSR| Multrec recursion limit                                              512
 DBCSR| Multiplication stack size                                           1000
 DBCSR| Maximum elements for images                                    UNLIMITED
 DBCSR| Multiplicative factor virtual images                                   1
 DBCSR| Use multiplication densification                                       T
 DBCSR| Multiplication size stacks                                             3


  **** **** ******  **  PROGRAM STARTED AT               2019-05-07 20:54:44.290
 ***** ** ***  *** **   PROGRAM STARTED ON                          eec608605746
 **    ****   ******    PROGRAM STARTED BY                             <unknown>
 ***** **    ** ** **   PROGRAM PROCESS ID                                    67
  **** **  *******  **  PROGRAM STARTED IN         /workspace/2H2O_MD/frame_0000

 CP2K| version string:   


 *******************************************************************************
 *******************************************************************************
 **                                                                           **
 **     #####                         ##              ##                      **
 **    ##   ##            ##          ##              ##                      **
 **   ##     ##                       ##            ######                    **
 **   ##     ##  ##   ##  ##   #####  ##  ##   ####   ##    #####    #####    **
 **   ##     ##  ##   ##  ##  ##      ## ##   ##      ##   ##   ##  ##   ##   **
 **   ##  ## ##  ##   ##  ##  ##      ####     ###    ##   ######   ######    **
 **    ##  ###   ##   ##  ##  ##      ## ##      ##   ##   ##       ##        **
 **     #######   #####   ##   #####  ##  ##  ####    ##    #####   ##        **
 **           ##                                                    ##        **
 **           


 PW_GRID| Information for grid number                                          1
 PW_GRID| Cutoff [a.u.]                                                    200.0
 PW_GRID| spherical cutoff:                                                   NO
 PW_GRID|   Bounds   1            -50      49                Points:         100
 PW_GRID|   Bounds   2            -50      49                Points:         100
 PW_GRID|   Bounds   3            -50      49                Points:         100
 PW_GRID| Volume element (a.u.^3)  0.3455E-02     Volume (a.u.^3)      3455.1473
 PW_GRID| Grid span                                                    FULLSPACE

 PW_GRID| Information for grid number                                          2
 PW_GRID| Cutoff [a.u.]                                                     66.7
 PW_GRID| spherical cutoff:                                                   NO
 PW_GRID|   Bounds   1            -30      29                Points:          60
 PW_GRID|   Bounds   2    

 Energy with the initial guess:       -34.130489363

     Est. extremal eigenvalues     -1.07108     3.75655  converged:  T
     TRS4 it   1 1.00000000   0.179E+01       0.001      117.484   0.100E-07
     TRS4 it   2 1.00000000   0.156E+01       0.001      117.484   0.100E-07
     TRS4 it   3 1.00000000   0.892E+00       0.001      160.007   0.100E-07
     TRS4 it   4 1.00000000   0.555E+00       0.001      165.283   0.100E-07
     TRS4 it   5 1.00000000   0.330E+00       0.002      154.673   0.100E-07
     TRS4 it   6 1.00000000   0.150E+00       0.001      167.339   0.100E-07
     TRS4 it   7 1.00000000   0.414E-01       0.001      167.801   0.100E-07
     TRS4 it   8 1.00000000   0.408E-02       0.013       17.602   0.100E-07
     TRS4 it   9 1.00000000   0.501E-04       0.002      154.662   0.100E-07
     TRS4 it  10 1.00000000   0.763E-08       0.001      158.243   0.100E-07
     Final TRS4 iteration   10 1.00000000   0.763E-08
     Chemical potential (mu):      -0.22788

 SCF   


 SCF     7       -34.450480278        -0.000001150    1.655262

*********************************************************
 Using MIXING_FRACTION=0.450 to mix KS matrix:  iscf=  8
 KS_nw=0.450*KS + 0.550*KS_old
*********************************************************
     Est. extremal eigenvalues     -0.93775     3.89110  converged:  T
     TRS4 it   1 1.00000000   0.176E+01       0.001      117.484   0.100E-07
     TRS4 it   2 1.00000000   0.159E+01       0.001      117.484   0.100E-07
     TRS4 it   3 1.00000000   0.950E+00       0.002      153.046   0.100E-07
     TRS4 it   4 1.00000000   0.594E+00       0.002      145.294   0.100E-07
     TRS4 it   5 1.00000000   0.396E+00       0.001      166.455   0.100E-07
     TRS4 it   6 1.00000000   0.227E+00       0.001      165.636   0.100E-07
     TRS4 it   7 1.00000000   0.104E+00       0.002      151.203   0.100E-07
     TRS4 it   8 1.00000000   0.300E-01       0.002      155.600   0.100E-07
     TRS4 it   9 1.00000000   0.371E-02     