## 1. Definition of computational parameters

In [None]:
import os
import sys
import shutil
import re
import math
import nglview as nv
import pytraj as pt

#insert path to python scripts
sys.path.insert(1, os.environ['BASE'] + "/modules")

from draw_2d import *
from draw_3d import drawit

from molvs import Standardizer

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import SDWriter
from rdkit.Chem import AllChem
from rdkit.Chem import MolFromSmarts
from rdkit.Chem import rdmolops
from rdkit.Chem import rdchem

from convert import *
from execute_orca import *

In [None]:
#define parameters

ifilename = "molekula.txt"
gromacs_s = "/opt/podman-run.py gromacs -p" #gmx_mpi
gromacs_d = "/opt/podman-run.py gromacs -p -d" #double precision gromacs
plumed = gromacs_s

#number of conformers to be generated
numc = 50

#specify number of abailable cpu's to be used in evaluations (maximum possible on machine by default)
num_of_cpus = os.environ['CPUS']

## 2. Molecule shape processing

In [None]:
with open(ifilename, "r") as ifile:
    molecule = ifile.readlines()
smiles_molecule = str.split(molecule[0])[0]

molecule = Chem.MolFromSmiles(smiles_molecule)

render_svg(moltosvg(molecule))

In [None]:
with open("molekula.smi", "w") as smifile:
    smifile.write(smiles_molecule)
    
s = Standardizer()
molecule = s.standardize(molecule)
molecule = Chem.AddHs(molecule)
natoms = molecule.GetNumAtoms()

render_svg(moltosvg(molecule))

In [None]:
#generate conformations in pseudo-random way and identificate the one with the lowest energy

#set default id of minimal energy conformer to -1
minid = -1
#set default minimal energy to highest possible float number
minene = sys.float_info.max

molecules = AllChem.EmbedMultipleConfs(molecule, clearConfs=True, numConfs=numc)

#run 'MMFF94 force field' energy evaluationdone = AllChem.MMFFOptimizeMoleculeConfs(molecule)

for i in range(len(done)):
  if done[i][1]<minene:
    minene = done[i][1]
    minid = i
print(f'Minimal energy: {minene}')

writer = SDWriter("molekula.mol")
writer.write(molecule, confId=minid)

drawit(molecule)

In [None]:
#set pattern to detect torsion angles

RotatableBond = Chem.MolFromSmarts('[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]')
rotatables = molecule.GetSubstructMatches(RotatableBond)
print(rotatables)

In [None]:
#get numbers of atoms that form torsion angles

torsions = []
for rotatable in rotatables:
  pairs1 = []
  pairs2 = []
  for bond in molecule.GetBonds():
    if rotatable[0]==bond.GetBeginAtomIdx() and rotatable[1]!=bond.GetEndAtomIdx():
      pairs1.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
    if rotatable[1]==bond.GetBeginAtomIdx() and rotatable[0]!=bond.GetEndAtomIdx():
      pairs2.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
  torsions.append([pairs1[0][1],pairs1[0][0],pairs2[0][0],pairs2[0][1]])
print(torsions)

## 3. Preparation of environment and molecule

In [None]:
# prepare input files for energetic minimization

netcharge = rdmolops.GetFormalCharge(molecule)
!antechamber -i molekula.mol -fi mdl -o molekula.prepi -fo prepi -c bcc -nc {netcharge} && \
parmchk2 -i molekula.prepi -f prepi -o molekula.frcmod && \
tleap -f tleapin.txt && \
acpype -p molekula.prmtop -x molekula.inpcrd

!mkdir em
with open("em/em.mdp", "w") as emfile:
    emfile.write("integrator          =  steep\n")
    emfile.write("nsteps              =  100000\n")
    emfile.write("emtol               =  0\n")
    emfile.write("emstep              =  0.1\n")
    emfile.write("nstcomm             =  1\n")
    emfile.write("nstxout             =  100\n")
    emfile.write("nstvout             =  100\n")
    emfile.write("nstfout             =  0\n")
    emfile.write("nstlog              =  100\n")
    emfile.write("nstenergy           =  100\n")
    emfile.write("nstlist             =  1\n")
    emfile.write("ns_type             =  grid\n")
    emfile.write("coulombtype         =  cut-off\n")
    emfile.write("rlist               =  1.4\n")
    emfile.write("rcoulomb            =  1.4\n")
    emfile.write("rvdw                =  1.4\n")
    emfile.write("energygrps          =  System\n")
    emfile.write("epsilon-r           =  80\n")
    emfile.write("\n")
shutil.copy("MOL_GMX.gro", "em/")
shutil.copy("MOL_GMX.top", "em/")

In [None]:
!{gromacs_s} -w /em -- editconf -f MOL_GMX -o box -c -box 3 3 3 && \
{gromacs_s} -w /em -- grompp -f em.mdp -c box -p MOL_GMX -o em1 && \
{gromacs_d} -w /em -- mdrun -deffnm em1 -ntomp 2 

In [None]:
#prepare files for getting the molecule to dynamic state

!mkdir md
with open("md/md.mdp", "w") as mdfile:
    mdfile.write("integrator          = sd\n")
    mdfile.write("nsteps              = 100000\n")
    mdfile.write("dt                  = 0.001\n")
    mdfile.write("nstxout             = 1000\n")
    mdfile.write("nstvout             = 1000\n")
    mdfile.write("nstenergy           = 1000\n")
    mdfile.write("nstlog              = 1000\n")
    mdfile.write("continuation        = no\n")
    mdfile.write("constraints         = none\n")
    mdfile.write("cutoff-scheme       = Verlet\n")
    mdfile.write("ns_type             = grid\n")
    mdfile.write("nstlist             = 1\n")
    mdfile.write("rlist               = 1.4\n")
    mdfile.write("rcoulomb            = 1.4\n")
    mdfile.write("rvdw                = 1.4\n")
    mdfile.write("coulombtype         = cut-off\n")
    mdfile.write("tcoupl              = V-rescale\n")
    mdfile.write("tc-grps             = system\n")
    mdfile.write("tau_t               = 0.1\n")
    mdfile.write("ref_t               = 300\n")
    mdfile.write("pcoupl              = no\n")
    mdfile.write("pbc                 = xyz\n")
    mdfile.write("gen_vel             = yes\n")
    mdfile.write("epsilon-r           = 80\n")
    mdfile.write("\n")
shutil.copy("em/em1.gro", "md/")
shutil.copy("MOL_GMX.top", "md/")

In [None]:
!{gromacs_s} -w /md -- grompp -f md.mdp -c em1 -p MOL_GMX -o md1 && \
{gromacs_d} -w /md -- mdrun -deffnm md1 -ntomp 2

In [None]:
#show trajectory of conformations
#select group for trjconv evaluation
#Group     0 (         System)
#Group     1 (          Other)
#Group     2 (            MOL)
group = 0

!{gromacs_s} -g {group} -w /md -- trjconv -s md1.tpr -f md1.trr -o outTraj.pdb

traj = pt.load('md/outTraj.pdb')
view = nv.show_pytraj(traj)
view

In [None]:
#fix periodic boundaries errors when the molecule jumps out of the box
#select group for trjconv evaluation
#Group     0 (         System)
#Group     1 (          Other)
#Group     2 (            MOL)
group = 1

for i in range(len(torsions)):
  fr = str(float(100-len(torsions)+i)-0.01)
  to = str(float(100-len(torsions)+i)+0.01)
  !{gromacs_s} -g {group} -w /md -- trjconv -pbc nojump -s md1 -f md1 -o frame{i}.gro -b {fr} -e {to}<<EOF\n0\nEOF

## 4. Energetic optimisation

In [None]:
!mkdir mtd
with open("mtd/mtd.mdp", "w") as mtdfile:
    mtdfile.write("integrator          = sd\n")
    mtdfile.write("nsteps              = 1000000\n")
    mtdfile.write("dt                  = 0.001\n")
    mtdfile.write("nstxout             = 1000\n")
    mtdfile.write("nstvout             = 1000\n")
    mtdfile.write("nstenergy           = 1000\n")
    mtdfile.write("nstlog              = 1000\n")
    mtdfile.write("continuation        = no\n")
    mtdfile.write("constraints         = none\n")
    mtdfile.write("cutoff-scheme       = Verlet\n")
    mtdfile.write("ns_type             = grid\n")
    mtdfile.write("nstlist             = 1\n")
    mtdfile.write("rlist               = 1.4\n")
    mtdfile.write("rcoulomb            = 1.4\n")
    mtdfile.write("rvdw                = 1.4\n")
    mtdfile.write("coulombtype         = cut-off\n")
    mtdfile.write("tcoupl              = V-rescale\n")
    mtdfile.write("tc-grps             = system\n")
    mtdfile.write("tau_t               = 0.1\n")
    mtdfile.write("ref_t               = 300\n")
    mtdfile.write("pcoupl              = no\n")
    mtdfile.write("pbc                 = xyz\n")
    mtdfile.write("gen_vel             = yes\n")
    mtdfile.write("epsilon-r           = 80\n")
    mtdfile.write("\n")

In [None]:
for i in range(len(torsions)):    
    !mkdir mtd/w{i}
    with open("mtd/w{}/plumed.dat".format(i), "w") as plumeddat:
        plumeddat.write("RANDOM_EXCHANGES\n")
        plumeddat.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(natoms))
        for j in range(len(torsions)):
            plumeddat.write("TORSION ATOMS={},{},{},{} LABEL=cv{}\n".format(torsions[j][0],torsions[j][1],torsions[j][2],torsions[j][3],j+1))
        plumeddat.write("METAD ARG=cv{} HEIGHT=0.5 SIGMA=0.3 PACE=1000 GRID_MIN=-pi GRID_MAX=pi BIASFACTOR=15 LABEL=be\n".format(i+1))
        cvs = ""
        for j in range(len(torsions)):
            cvs=cvs+"cv{},".format(j+1)
        cvs = cvs[:-1]
        plumeddat.write("PRINT ARG={} STRIDE=1000 FILE=COLVAR\n".format(cvs))
        plumeddat.write("PRINT ARG=be.bias STRIDE=1000 FILE=BIAS\n")

shutil.copy("MOL_GMX.top", "mtd/")

In [None]:
for i in range(len(torsions)):
    shutil.copy("md/frame{}.gro".format(i), "mtd/w{}/".format(i))
    !{gromacs_s} -w /mtd -- grompp -f mtd.mdp -c w{i}/frame{i} -p MOL_GMX -o w{i}/mtd1

In [None]:
directories = ""
for i in range(len(torsions)):
    directories = directories + "w{} ".format(i)

!{gromacs_d} -n {len(torsions)} -w /mtd -- mdrun -ntomp 1 -deffnm mtd1 -replex 500 -plumed plumed.dat -multidir {directories}

## 5. Accurate energy evaluation (Quantum mechanics)

In [None]:
#select groups for cluster evaluation
#Group     0 (         System)
#Group     1 (          Other)
#Group     2 (            MOL)
groups = "10"

!{gromacs_s} -g {groups} -- cluster -method gromos -cl clustering/outCluster.pdb -s md/md1.tpr -f md/md1.trr


#divide all clusters from gmx cluster to single clusters
with open("clustering/outCluster.pdb") as input_cluster:
    i = 0
    outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w")
    for line in input_cluster:
        if line != "ENDMDL\n":
            outFile.write(line)
            continue
        outFile.write("ENDMDL\n")
        i += 1
        outFile.close()
        outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w")
!rm clustering/outClustersPDB/outCluster{i}.pdb
clusters_count = i


#convert .pdb clusters to .xyz
for pdb_cluster in os.listdir("clustering/outClustersPDB/"):
    !babel -ipdb clustering/outClustersPDB/{pdb_cluster} -oxyz clustering/outClustersXYZ/{pdb_cluster.replace("pdb", "xyz")}

In [None]:
#Orca method description - first line in orca method
method_description = "!AM1 Opt"


convert_to_orca_methods("clustering/outClustersXYZ/", "am1/input/", torsions, method_description, num_of_cpus)

In [None]:
#minimisation of conformations before quantum mechanics
execute_orca("am1/input/", "am1/output/", "am1")

In [None]:
#Orca method description - first line in orca method
method_description = "!BP86 def2-TZVP TightSCF Opt"


for i in range(0, clusters_count):
    convert_to_orca_methods("am1/output/outCluster{}/".format(i), "bp86/input/", torsions, method_description, num_of_cpus)

In [None]:
#run quantum mechanics
execute_orca("bp86/input/", "bp86/output/", "bp86")

In [None]:
#extract final energies from output of orca

with open("orca_energies.txt", "a") as ofile:
    for orca_output in os.listdir("orca_output/bp86"):    
        with open("orca_output/bp86/" + orca_output) as iorca_output:
            for line in reversed(list(iorca_output)):
                energy_list = re.findall(r'(FINAL SINGLE POINT ENERGY)( +)(-?\d+\.\d+)', line)
                if len(energy_list) > 0:
                    ofile.write(energy_list[0][2])
                    ofile.write('\n')
                    break

In [None]:
#convert from hartree to kJ/mol and write quantum mechanics energies to file

energies_in_hartree = []
with open("orca_energies.txt", "r") as ifile:
    for line in ifile.readlines():
        energies_in_hartree.append(float(line))

#constant to convert from hartree to kJ/mol
CONVERSION_CONST = 2625.499638
minimum = min(energies_in_hartree)

energies_in_kJ = []
for H in energies_in_hartree:
    energies_in_kJ.append((H-minimum)*CONVERSION_CONST)

with open("orca_energies.txt", "w") as ofile:
    for energy in energies_in_kJ:
        ofile.write("{}\n".format(energy))

## 6. Inaccurate energy evaluation (GAFF force field)

In [None]:
#copy atoms from conformation before QM and combine it with orca optimised conformations

output_dir = "pdb_opt/"
input_dir = "bp86/output/outCluster"

!mkdir {output_dir}

atoms = []
with open("clustering/outClustersPDB/outCluster0.pdb", "r") as ifile:
    for line in ifile.readlines():
        if "ATOM" in line:
            atoms.append(line[:26])
    
for i in range(0, clusters_count):
    hetatms = []
    !babel -ixyz {input_dir}{i}/outCluster{i}.xyz -opdb {output_dir}temp_cluster_{i}.pdb
    with open("{}temp_cluster_{}.pdb".format(output_dir, i), "r") as ifile:
        for line in ifile.readlines():
            if "HETATM" in line:
                hetatms.append(line[27:66])
    with open("pdb_opt/cluster{}.pdb".format(i), "w") as output_cluster:
        for i in range(len(atoms)):
            output_cluster.write(atoms[i])
            output_cluster.write(hetatms[i] + "\n")

!rm {output_dir}/temp_*

In [None]:
input_dir = "pdb_opt/"
output_file = "clusters.pdb"

#concatinate optimised conformations to trajectory
with open(output_file, "w") as ofile:
    for i in range(0, clusters_count):
        ofile.write("MODEL {}\n".format(str(i)))
        with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile:
            ofile.write(ifile.read())
            ofile.write("ENDMDL\n")

with open("plumed.dat", "w") as ifile:
    cvs = []
    ifile.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(str(clusters_count)))
    for i in range(0, len(torsions)):
        cvs.append("cv{}".format(i))
        ifile.write("TORSION ATOMS=")
        ifile.write(",".join(str(x) for x in torsions[i]))
        ifile.write(" LABEL={}\n".format(cvs[i]))
    ifile.write("PRINT ARG=")
    ifile.write(",".join(cvs))
    ifile.write(" STRIDE=1 FILE=DIHEDRALS")

#compute dihedrals
!{plumed} driver --plumed plumed.dat --mf_pdb {output_file}

lines = []
with open("DIHEDRALS", "r") as ifile:
    for line in ifile.readlines():
        if "#" not in line:
            lines.append(line)
with open("DIHEDRALS", "w") as ofile:
    for line in lines:
        ofile.write(line)

In [None]:
#perform minimisations and compute GAFF energy

cvs = [[]] * len(torsions)
with open("DIHEDRALS","r") as ifile:
    dihedrals = ifile.readlines()
    for i in range(len(cvs)):
        t_angles = cvs[i].split()
        for j in range(len(torsions)):
            cvs[j].append(float(t_angles[j])*(180/math.pi))

def generate_restraint(cluster_i):
    with open("MOL_GMX.top", "r") as ifile, open("gaff/cluster_{}/restrained.top".format(str(cluster_i)), "w"):
        for line in ifile.readlines():
            if line == "; Ligand position restraints\n":
                ofile.write("\n")
                ofile.write("[ dihedral_restraints ]\n")
                ofile.write(" ".join(torsions[i]))
                ofile.write("2 %3.1f 0 500\n")
                ofile.write("\n")
            ofile.write(line)
!mkdir gaff

#select groups for energy evaluation
groups = "10"

for i in range(0, clusters_count): 
    !mkdir gaff/cluster_{i}
    shutil.copy("pdb_opt/cluster{}.pdb".format(i), "gaff/cluster_{}".format(i))
    shutil.copy("MOL_GMX.top", "gaff/cluster_{}".format(i))
    shutil.copy("em.mdp", "gaff/cluster_{}".format(i))
    shutil.copy("md.mdp", "gaff/cluster_{}".format(i))
    generate_restraint(i)
    !{gromacs_s} -w /gaff/cluster_{i} -- editconf -f cluster{i}.pdb -box 3 3 3 -bt cubic -c -o box.gro
    !{gromacs_s} -w /gaff/cluster_{i} -- grompp -f em -c box.gro -p restrained.top -o em1
    !{gromacs_d} -w /gaff/cluster_{i} -- mdrun -ntomp 2 -s em1 -c after_em1 -g em1 -e em1 -o em1
    !{gromacs_s} -w /gaff/cluster_{i} -- grompp -f md -c box.gro -p MOL_GMX.top -o rerun
    !{gromacs_d} -w /gaff/cluster_{i} -- mdrun -ntomp 2 -s rerun -rerun em1 -c after_rerun -g rerun -e rerun -o rerun
    !{gromacs_s} -w /gaff/cluster_{i} -g {groups} -- energy -f rerun.edr -o rerun.xvg"

In [None]:
#extract energies and from each energy value subtract minimal energy

energies_lst = []

for i in range(0, clusters_count):
    with open("gaff/cluster_{}/rerun.xvg".format(i, "r")) as ifile:
        last_line = ifile.readlines()[-1]
        energies = last_line.split(" ")
        energies_lst.append(energies[len(energies) - 1].rstrip())

min_energy = min(energies_lst)
with open("gaff_energies.txt", "w") as ofile:
    for energy in energies_lst:
        ofile.write("{} \n".format((float(energy) - float(min_energy))))

## 7. Define correction of force field

In [None]:
#subtract from orca energies computed in step 5, GAFF energies computed in step 6

with open("reference", "w") as ofile:
    with open("orca_energies.txt", "r") as orca_energies, open("gaff_energies.txt", "r") as gaff_energies:
        for orca_energy in orca_energies.readlines():
            for gaff_energy in gaff_energies.readlines():
                ofile.write("{} \n".format((float(orca_energy) - float(gaff_energy))))

In [None]:
#write final corrections into output file along with representative conformations

input_dir = "pdb_opt/"

with open("reference.pdb", "w") as ofile, open("reference", "r") as ifile:
    final_energies = ifile.readlines()
    for i in range(0, clusters_count):
        ofile.write("REMARK X={}".format(final_energies[i]))
        with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile1:
            ofile.writelines(ifile1.readlines())