# Thermodynamics of Thiocyanate Anions in Water and at the Air-Water Interface - Simulation (SI)

## Prerequisites: openmm, mdtraj, packmol
- conda config --add channels omnia
- conda install -c omnia openmm
- https://simtk.org/api_docs/openmm/api6_1/python/index.html
- http://docs.openmm.org/7.0.0/userguide/index.html
- http://docs.openmm.org/7.0.0/api-python/index.html
- conda install -c omnia mdtraj
- http://mdtraj.org/1.7.2/
- install packmol with ./configure and make, copy the exe in the working directory
- http://www.ime.unicamp.br/~martinez/packmol/home.shtml

## Import modules

In [2]:
%matplotlib inline
from __future__ import division, print_function
import shutil
from io import StringIO
from math import pi
from scipy import integrate
from IPython.display import display, Math, Latex
import numpy as np
import mdtraj as md
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
import os 

## Parameter setup
In the next cell we specify the parameters that will be used in the simulation in a dictionary.

In [3]:
WORKDIR = '/home/vidar/playground/'#'/lunarc/nobackup/users/eko12vas/'
os.chdir(WORKDIR)
print("Current directory:", os.getcwd())

# simulation parameters: more to be added

# Dictionary, atom and system properties
d = {
'Forcefield name': 'ff_our',
'Water model': 'spce',
'Box size': 25,
'Input concentrations':    [0.445, 1.39], 
'Molal concentrations':    [0.5  , 2.0 ],  
'Bond length':             {'SC':0.169500,         'CN':0.115000,             'OH':0.10000}, 
'Harmonic bond constant':  {'SC':252929.5,         'CN':998590.3,             'OH':345000}, 
'Bond angle':              {'SCN':3.13810199508,   'HOH':1.91061193216}, 
'Harmonic angle constant': {'SCN':698.56,          'HOH':383}, 
'Mass':           {'S':32.0660, 'C':12.0110, 'N':14.0067,                          'Na':22.9898, 'K':39.0983, 'Cl':35.453,  'I':126.90 },
'Partial charge': {'S':-0.573,  'C':0.483,   'N':-0.91,   'O':-0.8476, 'H':0.4238, 'Na':1.000,   'K':1.000,   'Cl':-1.000,  'I':-1.000 },  
'Size':           {'S':0.383,   'C':0.335,   'N':0.37,    'O':0.3166,  'H':0,      'Na':0.255,   'K':0.403,   'Cl':0.43900, 'I':0.491  },  
'Well depth':     {'S':1.523,   'C':0.425,   'N':0.310,   'O':0.650,   'H':0,      'Na':0.28,    'K':0.85,    'Cl':0.41600, 'I':0.158  },
'Steps':          {'Simulation':10000, 'Report':1000},
'Constants':      {'A':6.022*pow(10,23),  'Unit converter':pow(10,-9)}}

cation = 'Na'
cation_name = 'sodium'

anion = 'SCN'
anion_name = 'thiocyanate'

# Defining function that converts concentration in molar to number of particles, based on box size
def conc2num(conc,box_size):
    n_scn = conc*pow(box_size*d['Constants']['Unit converter'],3)*d['Constants']['A']
    return round(n_scn)

Current directory: /home/vidar/playground


## Creating folder tree
In the next cell we create directories that will store the files needed for simulation and the output files.

In [11]:
if not os.path.exists('data'):
    os.mkdir('data')

if not os.path.exists('data/'+d['Forcefield name']):
    os.mkdir('data/'+d['Forcefield name'])
    
if not os.path.exists('data/'+d['Forcefield name']+'/'+d['Water model']):
    os.mkdir('data/'+d['Forcefield name']+'/'+d['Water model'])

if not os.path.exists('data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()):
    os.mkdir('data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower())

for conc_m in d['Molal concentrations']:

    if not os.path.exists('data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m'):
        os.mkdir('data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m')

## Using Packmol to generate input file
Here, we use Packmol to create a system of n thiocyanate molecules. The number n is based on the input concentration and the box size that we defined in the dictionary.

In [12]:
print('Current working directory: ', os.getcwd())
for (conc_input, conc_m) in zip(d['Input concentrations'], d['Molal concentrations']):
    
    wdir = WORKDIR+'data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m/'
    
    # write input file for packmol
    
    print('Concentration input:', conc_input, ' M results in', conc2num(conc_input, d['Box size']), 'SCN-ions')
    PACKMOL_PATH = '../packmol/'

    PACKMOL_INPUT = """ 
# Mixture 

tolerance %f
filetype pdb
output %s
add_amber_ter

structure %s
  number %d 
  inside box 0. 0. 0. %f %f %f
end structure
"""

    PACKMOL_INPUT = PACKMOL_INPUT % (1,str(conc_m)+'m_box.pdb',str(conc_m)+'m_scn.pdb',conc2num(conc_input, d['Box size']),d['Box size'],d['Box size'],d['Box size'])
    
    file_handle = open('packmol_input_scn.txt', 'w')
    file_handle.write(PACKMOL_INPUT)
    file_handle.close()
    
    # write pdb file for single SCN

    scn_pdb = """CRYST1  %f  %f  %f  90.00  90.00  90.00 P 1           1
HETATM    1  S   SCN A   1      20.000  20.000  20.000  1.00  0.00           S
HETATM    2  C   SCN A   1      20.000  20.000  21.670  1.00  0.00           C
HETATM    3  N   SCN A   1      20.000  20.000  22.760  1.00  0.00           N
TER       4      SCN A   1   
END"""

    scn_pdb = scn_pdb % (d['Box size'], d['Box size'], d['Box size'])
    
    with open(str(conc_m)+'m_scn.pdb', 'w') as text_file:
        text_file.write(scn_pdb)

    # use packmol to create a system of n_scn randomly placed scn molecules
    os.system("%s < %s" % (PACKMOL_PATH+'packmol', 'packmol_input_scn.txt'))
    
    # move created files to the dedicated folder
    for src_filename in [str(conc_m)+'m_scn.pdb', str(conc_m)+'m_box.pdb', 'packmol_input_scn.txt']:
        dst_filename = os.path.join(wdir, os.path.basename(src_filename))
        shutil.move(src_filename, dst_filename)

Current working directory:  /home/vidar/playground
Concentration input: 1.39  M results in 13 SCN-ions


## Writing the force field file
Here, the force field file is created. Some parameters, like the mass of atoms, are given values directly in the file since those are fixed in our study. Others, like the Lennard-Jones parameters, are retrieved from the dictionary created before.  

In [13]:
FFXML_topology = """
<ForceField>

 <AtomTypes>
  <Type name="scn-S" class="SS" element="S" mass="32.0660"/>
  <Type name="scn-C" class="CS" element="C" mass="12.0110"/>
  <Type name="scn-N" class="NS" element="N" mass="14.0067"/>
  <Type name="%s" class="%s" element="%s" mass="%f"/>
  <Type name="spce-O" class="OW" element="O" mass="15.99943"/>
  <Type name="spce-H" class="HW" element="H" mass="1.007947"/>

 </AtomTypes>

 <Residues>
  <Residue name="SCN">
   <Atom name="S" type="scn-S"/>
   <Atom name="C" type="scn-C"/>
   <Atom name="N" type="scn-N"/>
   <Bond from="0" to="1"/>
   <Bond from="1" to="2"/>
  </Residue>

  <Residue name="HOH">
   <Atom name="O" type="spce-O"/>
   <Atom name="H1" type="spce-H"/>
   <Atom name="H2" type="spce-H"/>
   <Bond from="0" to="1"/>
   <Bond from="0" to="2"/>
  </Residue>

  <Residue name="%s">
   <Atom name="%s" type="%s"/>
  </Residue>

 </Residues>"""

FFXML_bonded = """
 <HarmonicBondForce>
  <Bond class1="SS" class2="CS" length="%f" k="%f"/>
  <Bond class1="CS" class2="NS" length="%f" k="%f"/>
  <Bond class1="OW" class2="HW" length="%f" k="%f"/>
 </HarmonicBondForce>

 <HarmonicAngleForce>    
  <Angle class1="SS" class2="CS" class3="NS" angle="%f" k="%f"/>
  <Angle class1="HW" class2="OW" class3="HW" angle="%f" k="%f"/>
 </HarmonicAngleForce>"""

FFXML_nonbonded = """
 <NonbondedForce coulomb14scale="0.5" lj14scale="0.5">
  <Atom type="scn-S" charge="%f" sigma="%f" epsilon="%f"/>
  <Atom type="scn-C" charge="%f" sigma="%f" epsilon="%f"/>
  <Atom type="scn-N" charge="%f" sigma="%f" epsilon="%f"/>
  <Atom type="spce-O" charge="%f" sigma="%f" epsilon="%f"/>
  <Atom type="spce-H" charge="%f" sigma="%f" epsilon="%f"/>
  <Atom type="%s" charge="%f" sigma="%f" epsilon="%f"/>
 </NonbondedForce>

</ForceField>"""

FFXML_topology = FFXML_topology % (cation_name, cation, cation, d['Mass'][cation], cation, cation, cation_name)

FFXML_bonded = FFXML_bonded % (d['Bond length']['SC'], d['Harmonic bond constant']['SC'], d['Bond length']['CN'], d['Harmonic bond constant']['CN'], d['Bond length']['OH'], d['Harmonic bond constant']['OH'], d['Bond angle']['SCN'], d['Harmonic angle constant']['SCN'], d['Bond angle']['HOH'], d['Harmonic angle constant']['HOH'])
            
FFXML_nonbonded = FFXML_nonbonded % (d['Partial charge']['S'], d['Size']['S'], d['Well depth']['S'], d['Partial charge']['C'], d['Size']['C'], d['Well depth']['C'], d['Partial charge']['N'], d['Size']['N'], d['Well depth']['N'], d['Partial charge']['O'], d['Size']['O'], d['Well depth']['O'], d['Partial charge']['H'], d['Size']['H'], d['Well depth']['H'], cation_name, d['Partial charge'][cation], d['Size'][cation], d['Well depth'][cation])

FFXML = FFXML_topology + FFXML_bonded + FFXML_nonbonded

for conc_m in d['Molal concentrations']:
    wdir = WORKDIR+'data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m/'
    with open(wdir+str(d['Forcefield name'])+'.xml', 'w') as text_file:
        text_file.write(FFXML)
        
print(FFXML)


<ForceField>

 <AtomTypes>
  <Type name="scn-S" class="SS" element="S" mass="32.0660"/>
  <Type name="scn-C" class="CS" element="C" mass="12.0110"/>
  <Type name="scn-N" class="NS" element="N" mass="14.0067"/>
  <Type name="sodium" class="Na" element="Na" mass="22.989800"/>
  <Type name="spce-O" class="OW" element="O" mass="15.99943"/>
  <Type name="spce-H" class="HW" element="H" mass="1.007947"/>

 </AtomTypes>

 <Residues>
  <Residue name="SCN">
   <Atom name="S" type="scn-S"/>
   <Atom name="C" type="scn-C"/>
   <Atom name="N" type="scn-N"/>
   <Bond from="0" to="1"/>
   <Bond from="1" to="2"/>
  </Residue>

  <Residue name="HOH">
   <Atom name="O" type="spce-O"/>
   <Atom name="H1" type="spce-H"/>
   <Atom name="H2" type="spce-H"/>
   <Bond from="0" to="1"/>
   <Bond from="0" to="2"/>
  </Residue>

  <Residue name="Na">
   <Atom name="Na" type="sodium"/>
  </Residue>

 </Residues>
 <HarmonicBondForce>
  <Bond class1="SS" class2="CS" length="0.169500" k="252929.500000"/>
  <Bond cl

## Setting up the OpenMM simulation script
Here, the simulation script is created. The pdb file generated by Packmol before is needed, as well as the force field file created in the previous cell. 

In [14]:
for conc_m in d['Molal concentrations']:
    
    wdir = WORKDIR+'data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m/'
    
    openmm_script="""
from __future__ import division, print_function

import numpy as np

# OpenMM Imports
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
from sys import stdout

print("Current work directory: ", os.getcwd())
pdb = PDBFile("%s")
PDBFile.writeFile(pdb.topology, pdb.positions, open("%s", 'w'))

# load the force field
ff = ForceField("%s")

# modify the topology by adding SC and CN bonds
atoms = list(pdb.topology.atoms())

for i in np.arange(0,len(atoms)-2,3):
    pdb.topology.addBond(atoms[i],atoms[i+1])
    pdb.topology.addBond(atoms[i+1],atoms[i+2])
    i = i + 3


modeller = Modeller(pdb.topology, pdb.positions)
# Add  water and cations
modeller.addSolvent(ff,model='spce',boxSize=(%f,%f,%f)*angstroms,positiveIon='%s')

# Create the OpenMM system
print('Creating OpenMM System')

system = ff.createSystem(modeller.topology,nonbondedMethod=PME,ewaldErrorTolerance=0.0005,
                          nonbondedCutoff=1.0*nanometers, constraints=AllBonds, rigidWater=True)

# system = ff.createSystem(modeller.topology,nonbondedMethod=PME,ewaldErrorTolerance=0.0005,
#                           nonbondedCutoff=1.0*nanometers, constraints=HBonds, rigidWater=True)

# Create the integrator to do Langevin dynamics
integrator = LangevinIntegrator(
                        298*kelvin,       # Temperature of heat bath
                        1.0/picoseconds,  # Friction coefficient
                        2.0*femtoseconds, # Time step
)
integrator.setConstraintTolerance(0.00001)

# NPT ensemble
barostat = MonteCarloBarostat(1.0*bar, 298.0*kelvin, 25) 
system.addForce(barostat)
# Define the platform to use; CUDA, OpenCL, CPU, or Reference. Or do not specify
# the platform to use the default (fastest) platform
platform = Platform.getPlatformByName('CPU')

# prop = dict(CudaPrecision='mixed',CudaDeviceIndex='0,1')

# Create the Simulation object
sim = Simulation(modeller.topology, system, integrator, platform) # if prop specified, add ',prop)' to line

# print(platform.getPropertyValue(sim.context))

# Set the particle positions
sim.context.setPositions(modeller.positions)
# Minimize the energy
print('Minimizing energy')
sim.minimizeEnergy(tolerance=1*kilojoule/mole, maxIterations=1000)
LocalEnergyMinimizer.minimize(sim.context,tolerance=1*kilojoule/mole,maxIterations=1000)

sim.context.setVelocitiesToTemperature(298*kelvin)

sim.reporters.append(DCDReporter('out.dcd', %d))

sim.reporters.append(StateDataReporter(open('%s', 'w'), %d, step=True,
      potentialEnergy=True, totalEnergy=True, temperature=True, density=True,
          progress=True, remainingTime=True, speed=True, separator='\t', totalSteps = %d))

print('Running Production...')
sim.step(%d)
with open('out.chk', 'wb') as f:
      f.write(sim.context.createCheckpoint())
print('Saving pdb file')
positions = sim.context.getState(getPositions=True).getPositions()
PDBFile.writeFile(sim.topology, positions, open('out.pdb', 'w'))
print('Done!')"""
            
    openmm_script = openmm_script % (wdir+str(conc_m)+'m_box.pdb', str(conc_m)+'m_box.pdb', wdir+d['Forcefield name']+'.xml', d['Box size'], d['Box size'], d['Box size'], cation+'+', d['Steps']['Report'], 'out_file', d['Steps']['Report'], d['Steps']['Simulation'], d['Steps']['Simulation'])
    file_handle = open(wdir+'run.py', 'w')
    file_handle.write(openmm_script)
    file_handle.close()

## Submission script to the computer cluster Aurora
Here, one has the option to run the simulation on the computer cluster Aurora, in which case 'aurora' is set to 'True'. Else if 'aurora' is set to 'False', the content in this cell is omitted.

In [15]:
# Flow control: Set aurora to True if you have access. Else: simulation will be run locally.
aurora = False

if aurora:
    aurora_script="""#!/bin/bash
#SBATCH -p gpu
#SBATCH --exclusive
#SBATCH --gres=gpu:2
#SBATCH --mem-per-cpu=3100
#SBATCH -N 1
#SBATCH -A lu2017-2-5
#
# job time, change for what your job requires
#SBATCH -t 01:00:00
#
# job name
#SBATCH -J scn
#
# filenames stdout and stderr - customise, include %j
#SBATCH -o scn.out
#SBATCH -e scn.err

cd $SLURM_SUBMIT_DIR

#module purge
#module load GCC/5.4.0-2.26
#module load CUDA/8.0.44

module add intelcuda
module unload gcc
module load GCC/4.8.4


python run.py"""
    
    for conc_m in d['Molal concentrations']:
        wdir = WORKDIR+'data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m/'
        with open(wdir+'aurora.sh', 'w') as text_file:
            text_file.write(aurora_script)

## Running simulation
Finally, we run the simulation. If 'aurora' was set to 'True' in the previous cell, the simulation input will be submitted to the computer cluster Aurora. Otherwise, the simulation will be run locally. 

In [16]:
for conc_m in d['Molal concentrations']:
    wdir = WORKDIR+'data/'+d['Forcefield name']+'/'+d['Water model']+'/'+cation.lower()+anion.lower()+'/'+str(conc_m)+'m/'
    os.chdir(wdir)
    print("Current working directory: ", os.getcwd())
    if aurora:
        !sbatch aurora.sh
        
    else:
        !python run.py

Current working directory:  /home/vidar/playground/data/ff_our/spce/nascn/2.0m
Current work directory:  /home/vidar/playground/data/ff_our/spce/nascn/2.0m
Creating OpenMM System
Minimizing energy
Running Production...
Saving pdb file
Done!
