-
Notifications
You must be signed in to change notification settings - Fork 75
/
nvt.py
executable file
·471 lines (413 loc) · 20.5 KB
/
nvt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
#!/usr/bin/env python
#--- coding: utf-8 ---
"""
@package nvt
Runs a simulation to compute condensed phase properties (for example, the density
or the enthalpy of vaporization) and compute the derivative with respect
to changing the force field parameters. This script is a part of ForceBalance.
The algorithm used to compute the surface tension is the test-area method.
We run a NVT simulation on a film of water in a long PBC box. For each frame, we
increase and decrease the area of the xy surface by ΔS but keep the volumn constant,
and compute the energy change ΔU+ and UΔ-, then the surface tension is computed by:
β = 1/kT
γ = -1/(2βΔS) * [ ln<exp(-ΔE+/kT)> - ln<exp(-ΔE-/kT)> ]
References
[1] Vega C, Miguel E. de. Surface tension of the most popular models of water by
using the test-area simulation method JCP 126:154707, 2007.
The partial derivatives of surface tension with respect to forcefield parameters
are computed using the analytic formula:
β = 1/kT
∂γ/∂α = 1/(2ΔS) * [ <(∂E+/∂α - ∂E/∂α) exp(-βΔE+)> / <exp(-βΔE+)>
-<(∂E-/∂α - ∂E/∂α) exp(-βΔE-)> / <exp(-βΔE-)> ]
Copyright And License
@author Yudong Qiu <qiu@ucdavis.edu>
@author Lee-Ping Wang <leeping@ucdavis.edu>
Note: Many parts of this script is directly copied from npt.py
All code in this repository is released under the BSD 3-Clause License (aka BSD 2.0).
Please see github.com/leeping/forcebalance for more details.
"""
from __future__ import division
#==================#
#| Global Imports |#
#==================#
from builtins import range
import os
import sys
import glob
import shutil
import argparse
import traceback
import numpy as np
from copy import deepcopy
from collections import namedtuple, OrderedDict
from forcebalance.forcefield import FF
from forcebalance.nifty import col, flat, lp_dump, lp_load, printcool, printcool_dictionary, statisticalInefficiency, which, _exec, isint, wopen, click
from forcebalance.finite_difference import fdwrap, f1d2p, f12d3p, f1d7p, in_fd
from forcebalance.molecule import Molecule
from forcebalance.output import getLogger
logger = getLogger(__name__)
#========================================================#
#| Global, user-tunable variables (simulation settings) |#
#========================================================#
parser = argparse.ArgumentParser("nvt simulation tool for computing surface tension and derivatives.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('engine', choices=['openmm'], help='MD program that we are using.')
parser.add_argument('temperature', type=float, help='Temperature (K)')
args = parser.parse_args()
faststep = 0.25 # "fast" timestep (for MTS integrator, if used)
temperature = args.temperature # temperature in kelvin
engname = args.engine.lower() # Name of the engine
if engname == "openmm":
try:
from simtk.unit import *
from simtk.openmm import *
from simtk.openmm.app import *
except:
traceback.print_exc()
raise Exception("Cannot import OpenMM modules")
from forcebalance.openmmio import *
Engine = OpenMM
#elif engname == "gromacs" or engname == "gmx":
# from forcebalance.gmxio import *
# Engine = GMX
#elif engname == "tinker":
# from forcebalance.tinkerio import *
# Engine = TINKER
else:
raise Exception('Only OpenMM is supported at this time.')
#==================#
#| Subroutines |#
#==================#
def mean_stderr(ts):
""" Get mean and standard deviation of a time series. """
return np.mean(ts), np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts))
def bzavg(obs,boltz):
""" Get the Boltzmann average of an observable. """
if obs.ndim == 2:
if obs.shape[0] == len(boltz) and obs.shape[1] == len(boltz):
raise Exception('Error - both dimensions have length equal to number of snapshots, now confused!')
elif obs.shape[0] == len(boltz):
return np.sum(obs*boltz.reshape(-1,1),axis=0)/np.sum(boltz)
elif obs.shape[1] == len(boltz):
return np.sum(obs*boltz,axis=1)/np.sum(boltz)
else:
raise Exception('The dimensions are wrong!')
elif obs.ndim == 1:
return np.dot(obs,boltz)/sum(boltz)
else:
raise Exception('The number of dimensions can only be 1 or 2!')
def PrintEDA(EDA, N):
# Get energy decomposition statistics.
PrintDict = OrderedDict()
for key, val in EDA.items():
val_avg, val_err = mean_stderr(val)
if val_avg == 0.0: continue
if val_err == 0.0: continue
PrintDict[key] = "% 12.4f +- %10.4f [ % 9.4f +- %7.4f ]" % (val_avg, val_err, val_avg/N, val_err/N)
printcool_dictionary(PrintDict, "Energy Component Analysis, Mean +- Stderr [Per Molecule] (kJ/mol)")
#=============================================#
#| Functions for differentiating energy |#
#| and properties |#
#=============================================#
def energy_derivatives(engine, FF, mvals, h, pgrad, length, AGrad=True, dipole=False):
"""
Compute the first and second derivatives of a set of snapshot
energies with respect to the force field parameters.
This basically calls the finite difference subroutine on the
energy_driver subroutine also in this script.
@param[in] mvals Mathematical parameter values
@param[in] h Finite difference step size
@param[in] phase The phase (liquid, gas) to perform the calculation on
@param[in] AGrad Switch to turn derivatives on or off; if off, return all zeros
@param[in] dipole Switch for dipole derivatives.
@return G First derivative of the energies in a N_param x N_coord array
@return GDx First derivative of the box dipole moment x-component in a N_param x N_coord array
@return GDy First derivative of the box dipole moment y-component in a N_param x N_coord array
@return GDz First derivative of the box dipole moment z-component in a N_param x N_coord array
"""
G = np.zeros((FF.np,length))
GDx = np.zeros((FF.np,length))
GDy = np.zeros((FF.np,length))
GDz = np.zeros((FF.np,length))
if not AGrad:
return G, GDx, GDy, GDz
def energy_driver(mvals_):
FF.make(mvals_)
if dipole:
return engine.energy_dipole()
else:
return engine.energy()
ED0 = energy_driver(mvals)
for i in pgrad:
logger.info("%i %s\r" % (i, (FF.plist[i] + " "*30)))
EDG, _ = f12d3p(fdwrap(energy_driver,mvals,i),h,f0=ED0)
if dipole:
G[i,:] = EDG[:,0]
GDx[i,:] = EDG[:,1]
GDy[i,:] = EDG[:,2]
GDz[i,:] = EDG[:,3]
else:
G[i,:] = EDG[:]
#reset FF parameters
FF.make(mvals)
return G, GDx, GDy, GDz
def property_derivatives(engine, FF, mvals, h, pgrad, kT, property_driver, property_kwargs, AGrad=True):
"""
Function for double-checking property derivatives. This function is called to perform
a more explicit numerical derivative of the property, rather than going through the
fluctuation formula. It takes longer and is potentially less precise, which means
it's here mainly as a sanity check.
@param[in] mvals Mathematical parameter values
@param[in] h Finite difference step size
@param[in] phase The phase (liquid, gas) to perform the calculation on
@param[in] property_driver The function that calculates the property
@param[in] property_driver A dictionary of arguments that goes into calculating the property
@param[in] AGrad Switch to turn derivatives on or off; if off, return all zeros
@return G First derivative of the property
"""
G = np.zeros(FF.np)
if not AGrad:
return G
def energy_driver(mvals_):
FF.make(mvals_)
return engine.energy_dipole()
ED0 = energy_driver(mvals)
E0 = ED0[:,0]
D0 = ED0[:,1:]
P0 = property_driver(None, **property_kwargs)
if 'h_' in property_kwargs:
H0 = property_kwargs['h_'].copy()
for i in pgrad:
logger.info("%s\n" % (FF.plist[i] + " "*30))
ED1 = fdwrap(energy_driver,mvals,i)(h)
E1 = ED1[:,0]
D1 = ED1[:,1:]
b = np.exp(-(E1-E0)/kT)
b /= np.sum(b)
if 'h_' in property_kwargs:
property_kwargs['h_'] = H0.copy() + (E1-E0)
if 'd_' in property_kwargs:
property_kwargs['d_'] = D1.copy()
S = -1*np.dot(b,np.log(b))
InfoContent = np.exp(S)
if InfoContent / len(E0) < 0.1:
logger.warn("Warning: Effective number of snapshots: % .1f (out of %i)\n" % (InfoContent, len(E0)))
P1 = property_driver(b=b,**property_kwargs)
EDM1 = fdwrap(energy_driver,mvals,i)(-h)
EM1 = EDM1[:,0]
DM1 = EDM1[:,1:]
b = np.exp(-(EM1-E0)/kT)
b /= np.sum(b)
if 'h_' in property_kwargs:
property_kwargs['h_'] = H0.copy() + (EM1-E0)
if 'd_' in property_kwargs:
property_kwargs['d_'] = DM1.copy()
S = -1*np.dot(b,np.log(b))
InfoContent = np.exp(S)
if InfoContent / len(E0) < 0.1:
logger.warn("Warning: Effective number of snapshots: % .1f (out of %i)\n" % (InfoContent, len(E0)))
PM1 = property_driver(b=b,**property_kwargs)
G[i] = (P1-PM1)/(2*h)
if 'h_' in property_kwargs:
property_kwargs['h_'] = H0.copy()
if 'd_' in property_kwargs:
property_kwargs['d_'] = D0.copy()
return G
def main():
"""
Usage: (runcuda.sh) nvt.py <openmm|gromacs|tinker> <liquid_nsteps> <liquid_timestep (fs)> <liquid_intvl (ps> <temperature>
This program is meant to be called automatically by ForceBalance on
a GPU cluster (specifically, subroutines in openmmio.py). It is
not easy to use manually. This is because the force field is read
in from a ForceBalance 'FF' class.
"""
printcool("ForceBalance condensed phase NVT simulation using engine: %s" % engname.upper(), color=4, bold=True)
#----
# Load the ForceBalance pickle file which contains:
#----
# - Force field object
# - Optimization parameters
# - Options from the Target object that launched this simulation
# - Switch for whether to evaluate analytic derivatives.
FF,mvals,TgtOptions,AGrad = lp_load('forcebalance.p')
FF.ffdir = '.'
# Write the force field file.
FF.make(mvals)
#----
# Load the options that are set in the ForceBalance input file.
#----
# Finite difference step size
h = TgtOptions['h']
pgrad = TgtOptions['pgrad']
# MD options; time step (fs), production steps, equilibration steps, interval for saving data (ps)
nvt_timestep = TgtOptions['nvt_timestep']
nvt_md_steps = TgtOptions['nvt_md_steps']
nvt_eq_steps = TgtOptions['nvt_eq_steps']
nvt_interval = TgtOptions['nvt_interval']
liquid_fnm = TgtOptions['nvt_coords']
# Number of threads, multiple timestep integrator, anisotropic box etc.
threads = TgtOptions.get('md_threads', 1)
mts = TgtOptions.get('mts_integrator', 0)
rpmd_beads = TgtOptions.get('rpmd_beads', 0)
force_cuda = TgtOptions.get('force_cuda', 0)
nbarostat = TgtOptions.get('n_mcbarostat', 25)
anisotropic = TgtOptions.get('anisotropic_box', 0)
minimize = TgtOptions.get('minimize_energy', 1)
# Print all options.
printcool_dictionary(TgtOptions, title="Options from ForceBalance")
nvt_snapshots = int((nvt_timestep * nvt_md_steps / 1000) / nvt_interval)
nvt_iframes = int(1000 * nvt_interval / nvt_timestep)
logger.info("For the condensed phase system, I will collect %i snapshots spaced apart by %i x %.3f fs time steps\n" \
% (nvt_snapshots, nvt_iframes, nvt_timestep))
if nvt_snapshots < 2:
raise Exception('Please set the number of liquid time steps so that you collect at least two snapshots (minimum %i)' \
% (2000 * (nvt_interval/nvt_timestep)))
#----
# Loading coordinates
#----
ML = Molecule(liquid_fnm, toppbc=True)
# Determine the number of molecules in the condensed phase coordinate file.
NMol = len(ML.molecules)
TgtOptions['n_molecules'] = NMol
logger.info("There are %i molecules in the liquid\n" % (NMol))
#----
# Setting up MD simulations
#----
EngOpts = OrderedDict()
EngOpts["liquid"] = OrderedDict([("coords", liquid_fnm), ("mol", ML), ("pbc", True)])
if "nonbonded_cutoff" in TgtOptions:
EngOpts["liquid"]["nonbonded_cutoff"] = TgtOptions["nonbonded_cutoff"]
else:
largest_available_cutoff = min(ML.boxes[0][:3]) / 2 - 0.1
EngOpts["liquid"]["nonbonded_cutoff"] = largest_available_cutoff
logger.info("nonbonded_cutoff is by default set to the largest available value: %g Angstrom" %largest_available_cutoff)
if "vdw_cutoff" in TgtOptions:
EngOpts["liquid"]["vdw_cutoff"] = TgtOptions["vdw_cutoff"]
# Hard Code nonbonded_cutoff to 13A for test
#EngOpts["liquid"]["nonbonded_cutoff"] = EngOpts["liquid"]["vdw_cutoff"] = 13.0
GenOpts = OrderedDict([('FF', FF)])
if engname == "openmm":
# OpenMM-specific options
EngOpts["liquid"]["platname"] = TgtOptions.get("platname", 'CUDA')
if force_cuda:
try: Platform.getPlatformByName('CUDA')
except: raise RuntimeError('Forcing failure because CUDA platform unavailable')
EngOpts["liquid"]["platname"] = 'CUDA'
if threads > 1: logger.warn("Setting the number of threads will have no effect on OpenMM engine.\n")
EngOpts["liquid"].update(GenOpts)
for i in EngOpts:
printcool_dictionary(EngOpts[i], "Engine options for %s" % i)
# Set up MD options
MDOpts = OrderedDict()
MDOpts["liquid"] = OrderedDict([("nsteps", nvt_md_steps), ("timestep", nvt_timestep),
("temperature", temperature),
("nequil", nvt_eq_steps), ("minimize", minimize),
("nsave", int(1000 * nvt_interval / nvt_timestep)),
("verbose", True),
('save_traj', TgtOptions['save_traj']),
("threads", threads),
("mts", mts), ("rpmd_beads", rpmd_beads), ("faststep", faststep)])
# Energy components analysis disabled for OpenMM MTS because it uses force groups
if (engname == "openmm" and mts): logger.warn("OpenMM with MTS integrator; energy components analysis will be disabled.\n")
# Create instances of the MD Engine objects.
Liquid = Engine(name="liquid", **EngOpts["liquid"])
#=================================================================#
# Run the simulation for the full system and analyze the results. #
#=================================================================#
printcool("Condensed phase NVT molecular dynamics", color=4, bold=True)
click()
prop_return = Liquid.molecular_dynamics(**MDOpts["liquid"])
logger.info("Liquid phase MD simulation took %.3f seconds\n" % click())
Potentials = prop_return['Potentials']
#============================================#
# Compute the potential energy derivatives. #
#============================================#
# if AGrad:
# logger.info("Calculating potential energy derivatives with finite difference step size: %f\n" % h)
# # Switch for whether to compute the derivatives two different ways for consistency.
# FDCheck = False
# printcool("Condensed phase energy and dipole derivatives\nInitializing array to length %i" % len(Potentials), color=4, bold=True)
# click()
# G, GDx, GDy, GDz = energy_derivatives(Liquid, FF, mvals, h, pgrad, len(Potentials), AGrad, dipole=False)
# logger.info("Condensed phase energy derivatives took %.3f seconds\n" % click())
#==============================================#
# Condensed phase properties and derivatives. #
#==============================================#
# Physical constants
kB = 0.008314472471220214
T = temperature
kT = kB * T # Unit: kJ/mol
#--- Surface Tension ----
logger.info("Start Computing surface tension.\n")
perturb_proportion = 0.0005
box_vectors = np.array(Liquid.xyz_omms[0][1]/nanometer) # obtain original box vectors from first frame
delta_S = np.sqrt(np.sum(np.cross(box_vectors[0], box_vectors[1])**2)) * perturb_proportion * 2 # unit: nm^2. *2 for 2 surfaces
# perturb xy area +
click()
scale_x = scale_y = np.sqrt(1 + perturb_proportion)
scale_z = 1.0 / (1+perturb_proportion) # keep the box volumn while changing the area of xy plane
Liquid.scale_box(scale_x, scale_y, scale_z)
logger.info("scale_box+ took %.3f seconds\n" %click())
# Obtain energies and gradients
Potentials_plus = Liquid.energy()
logger.info("Calculation of energies for perturbed box+ took %.3f seconds\n" %click())
if AGrad:
G_plus, _, _, _ = energy_derivatives(Liquid, FF, mvals, h, pgrad, len(Potentials), AGrad, dipole=False)
logger.info("Calculation of energy gradients for perturbed box+ took %.3f seconds\n" %click())
# perturb xy area - ( Note: also need to cancel the previous scaling)
scale_x = scale_y = np.sqrt(1 - perturb_proportion) * (1.0/scale_x)
scale_z = 1.0 / (1-perturb_proportion) * (1.0/scale_z)
Liquid.scale_box(scale_x, scale_y, scale_z)
logger.info("scale_box- took %.3f seconds\n" %click())
# Obtain energies and gradients
Potentials_minus = Liquid.energy()
logger.info("Calculation of energies for perturbed box- took %.3f seconds\n" %click())
if AGrad:
G_minus, _, _, _ = energy_derivatives(Liquid, FF, mvals, h, pgrad, len(Potentials), AGrad, dipole=False)
logger.info("Calculation of energy gradients for perturbed box- took %.3f seconds\n" %click())
# Compute surface tension
dE_plus = Potentials_plus - Potentials # Unit: kJ/mol
dE_minus = Potentials_minus - Potentials # Unit: kJ/mol
prefactor = -0.5 * kT / delta_S / 6.0221409e-1 # Unit mJ m^-2
# Following equation: γ = -kT/(2ΔS) * [ ln<exp(-ΔE+/kT)> - ln<exp(-ΔE-/kT)> ]
#plus_avg, plus_err = mean_stderr(np.exp(-dE_plus/kT))
#minus_avg, minus_err = mean_stderr(np.exp(-dE_minus/kT))
#surf_ten = -0.5 * kT / delta_S * ( np.log(plus_avg) - np.log(minus_avg) ) / 6.0221409e-1 # convert to mJ m^-2
#surf_ten_err = 0.5 * kT / delta_S * ( np.log(plus_avg+plus_err) - np.log(plus_avg-plus_err) + np.log(minus_avg+minus_err) - np.log(minus_avg-minus_err) ) / 6.0221409e-1
exp_dE_plus = np.exp(-dE_plus/kT)
exp_dE_minus = np.exp(-dE_minus/kT)
surf_ten = prefactor * ( np.log(np.mean(exp_dE_plus)) - np.log(np.mean(exp_dE_minus)) )
# Use bootstrap method to estimate the error
num_frames = len(exp_dE_plus)
numboots = 1000
surf_ten_boots = np.zeros(numboots)
for i in range(numboots):
boots_ordering = np.random.randint(num_frames, size=num_frames)
boots_exp_dE_plus = np.take(exp_dE_plus, boots_ordering)
boots_exp_dE_minus = np.take(exp_dE_minus, boots_ordering)
surf_ten_boots[i] = prefactor * ( np.log(np.mean(boots_exp_dE_plus)) - np.log(np.mean(boots_exp_dE_minus)) )
surf_ten_err = np.std(surf_ten_boots) * np.sqrt(np.mean([statisticalInefficiency(exp_dE_plus), statisticalInefficiency(exp_dE_minus)]))
printcool("Surface Tension: % .4f +- %.4f mJ m^-2" % (surf_ten, surf_ten_err))
# Analytic Gradient of surface tension
# Formula: β = 1/kT
# ∂γ/∂α = 1/(2ΔS) * [ <(∂E+/∂α - ∂E/∂α) exp(-βΔE+)> / <exp(-βΔE+)>
# -<(∂E-/∂α - ∂E/∂α) exp(-βΔE-)> / <exp(-βΔE-)> ]
n_params = len(mvals)
G_surf_ten = np.zeros(n_params)
if AGrad:
beta = 1.0 / kT
plus_denom = np.mean(np.exp(-beta*dE_plus))
minus_denom = np.mean(np.exp(-beta*dE_minus))
prefactor = 0.5 / delta_S / 6.0221409e-1 # Unit mJ m^-2
for param_i in range(n_params):
plus_numer = np.mean( G_plus[param_i] * np.exp(-beta*dE_plus) )
minus_numer = np.mean( G_minus[param_i] * np.exp(-beta*dE_minus) )
G_surf_ten[param_i] = prefactor * (plus_numer/plus_denom - minus_numer/minus_denom)
printcool("Analytic Derivatives:")
FF.print_map(vals=G_surf_ten)
logger.info("Writing final force field.\n")
pvals = FF.make(mvals)
logger.info("Writing all results to disk.\n")
result_dict = {'surf_ten': surf_ten, 'surf_ten_err': surf_ten_err, 'G_surf_ten': G_surf_ten}
lp_dump(result_dict, 'nvt_result.p')
if __name__ == "__main__":
main()