In [None]:
from simtk import unit
from simtk import openmm
import numpy as np
from sys import stdout
from openmmtools import integrators
import random
import matplotlib.pyplot as plt

Defines system for use in BG and in MD simulation, can create from scratch as commented out

In [None]:
# pdb = app.PDBFile('ala2_fromURL.pdb')
# topology = pdb.getTopology()
# positions = pdb.getPositions(asNumpy=True).value_in_unit(unit.nanometer)

# ff = app.ForceField('amber99sbildn.xml',"amber96_obc.xml")
# system = ff.createSystem(
#     topology=topology, 
#     removeCMMotion=True,
#     nonbondedMethod=app.NoCutoff,
#     constraints=app.HBonds, 
#     rigidWater=True
#     )

with open('ala2_xml_system.txt') as f:
    xml = f.read()
system = openmm.XmlSerializer.deserialize(xml)
#platform 2 = CUDA
platform = openmm.Platform.getPlatform(2)

In [None]:
#Setting up generator
import torch

device = "cuda:3" if torch.cuda.is_available() else "cpu"
dtype = torch.float32
# a context tensor to send data to the right device and dtype via '.to(ctx)'
ctx = torch.zeros([], device=device, dtype=dtype)

#need to load a dataset for dimensions of BG and to set up Mixed Coordinate Transform which requires data as an argument
import mdtraj
dataset = mdtraj.load('TSFtraj.dcd', top='ala2_fromURL.pdb')

import numpy as np
rigid_block = np.array([6, 8, 9, 10, 14])
z_matrix = np.array([
    [0, 1, 4, 6],
    [1, 4, 6, 8],
    [2, 1, 4, 0],
    [3, 1, 4, 0],
    [4, 6, 8, 14],
    [5, 4, 6, 8],
    [7, 6, 8, 4],
    [11, 10, 8, 6],
    [12, 10, 8, 11],
    [13, 10, 8, 11],
    [15, 14, 8, 16],
    [16, 14, 8, 6],
    [17, 16, 14, 15],
    [18, 16, 14, 8],
    [19, 18, 16, 14],
    [20, 18, 16, 19],
    [21, 18, 16, 19]
])

def dimensions(dataset):
        return np.prod(dataset.xyz[0].shape)
dim = dimensions(dataset)

#system setup, probably need to write a function to do this
# from simtk import openmm
# with open('ala2_xml_system.txt') as f:
#     xml = f.read()
# system = openmm.XmlSerializer.deserialize(xml)
from bgflow.distribution.energy.openmm import OpenMMBridge, OpenMMEnergy
temperature = 300.0 * unit.kelvin
collision_rate = 1.0 / unit.picosecond
timestep = 4.0 * unit.femtosecond
integrator = integrators.LangevinIntegrator(temperature=temperature,collision_rate=collision_rate,timestep=timestep)
energy_bridge = OpenMMBridge(system, integrator, n_workers=1)
target_energy = OpenMMEnergy(int(dim), energy_bridge)

#setting up training_data argument for MixedCoordinateTransform - not sure how much effect this has
n_train = len(dataset)//2
n_test = len(dataset) - n_train
permutation = np.random.permutation(n_train)
all_data = dataset.xyz.reshape(-1, dimensions(dataset))
training_data = torch.tensor(all_data[permutation]).to(ctx)
test_data = torch.tensor(all_data[permutation + n_train]).to(ctx)

import bgflow as bg

dim_cartesian = len(rigid_block) * 3 - 6
dim_bonds = len(z_matrix)
dim_angles = dim_bonds
dim_torsions = dim_bonds

#set up coordinate transform layer
coordinate_transform = bg.MixedCoordinateTransformation(
    data=training_data, 
    z_matrix=z_matrix,
    fixed_atoms=rigid_block,
    keepdims=dim_cartesian, 
    normalize_angles=True,
).to(ctx)

#setting up prior distribution
dim_ics = dim_bonds + dim_angles + dim_torsions + dim_cartesian
mean = torch.zeros(dim_ics).to(ctx) 
# passing the mean explicitly to create samples on the correct device
prior = bg.NormalDistribution(dim_ics, mean=mean)

split_into_ics_flow = bg.SplitFlow(dim_bonds, dim_angles, dim_torsions, dim_cartesian)

#defining RealNVP
class RealNVP(bg.SequentialFlow):
    
    def __init__(self, dim, hidden):
        self.dim = dim
        self.hidden = hidden
        super().__init__(self._create_layers())
    
    def _create_layers(self):
        dim_channel1 =  self.dim//2
        dim_channel2 = self.dim - dim_channel1
        split_into_2 = bg.SplitFlow(dim_channel1, dim_channel2)
        
        layers = [
            # -- split
            split_into_2,
            # --transform
            self._coupling_block(dim_channel1, dim_channel2),
            bg.SwapFlow(),
            self._coupling_block(dim_channel2, dim_channel1),
            # -- merge
            bg.InverseFlow(split_into_2)
        ]
        return layers
        
    def _dense_net(self, dim1, dim2):
        return bg.DenseNet(
            [dim1, *self.hidden, dim2],
            activation=torch.nn.ReLU()
        )
    
    def _coupling_block(self, dim1, dim2):
        return bg.CouplingFlow(bg.AffineTransformer(
            shift_transformation=self._dense_net(dim1, dim2),
            scale_transformation=self._dense_net(dim1, dim2)
        ))

#setting up normalising flow composed of RealNVP followed by coordinate transform
n_realnvp_blocks = 5
layers = []

for i in range(n_realnvp_blocks):
    layers.append(RealNVP(dim_ics, hidden=[128, 128, 128]))
layers.append(split_into_ics_flow)
layers.append(bg.InverseFlow(coordinate_transform))

flow = bg.SequentialFlow(layers).to(ctx)

#loading trained model into empty
flow.load_state_dict(torch.load('modelTSFtraj_xmlsystem_20000KLL.pt'))

#setting up generator
generator = bg.BoltzmannGenerator(
    flow=flow,
    prior=prior,
    target=target_energy)

In [None]:
#generator_new = bg.BoltzmannGenerator()

In [None]:
def getbg_positions():    
    bg_positions_tensor, dlogp_tensor = generator.sample(1,with_dlogp=True)
    bg_positions = bg_positions_tensor.cpu().detach().numpy().reshape(22,3)
    dlogp = dlogp_tensor.cpu().detach().numpy()
    return bg_positions, dlogp
#print(bg_positions, dlogp)


In [None]:
def getbias(positions):
    torch_positions = torch.tensor(positions.value_in_unit(unit.nanometer).reshape(-1,66)).to(ctx)
    z, dlogp_inverse_tensor = flow.forward(torch_positions,inverse=True)
    dlogp_inverse = dlogp_inverse_tensor.cpu().detach().numpy()
    return -dlogp_inverse

In [None]:
#unit.BOLTZMANN_CONSTANT_kB is in units of J/K
kb = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA
kt = kb * temperature
kt = kt.value_in_unit(unit.kilojoule_per_mole)
beta = 1/kt

In [None]:
cycles = 100
MDsteps = 1000
BGmoves = 1000

In [None]:
##Setting up MD and initialising

pdb = openmm.app.PDBFile('ala2_fromURL.pdb')
topology = pdb.getTopology()
positions = pdb.getPositions(asNumpy=True).value_in_unit(unit.nanometer)

md_temperature = 1000 * unit.kelvin
md_collision_rate = 1.0 / unit.picosecond
md_timestep = 1.0 * unit.femtosecond

integrator = integrators.LangevinIntegrator(temperature=md_temperature,collision_rate=md_collision_rate,timestep=md_timestep)
#integrator.setConstraintTolerance(0.00001)
#integrator = openmm.VerletIntegrator(timestep)
properties_dict = {}
properties_dict["DeviceIndex"] = "1"
simulation = openmm.app.Simulation(topology, system, integrator,platform,platformProperties=properties_dict)
simulation.context.setPositions(positions)
simulation.minimizeEnergy()
simulation.context.setVelocitiesToTemperature(temperature)
#simulation.reporters.append(openmm.app.StateDataReporter(stdout, reportInterval=100, step=True, potentialEnergy=True,temperature=True,kineticEnergy=True))
simulation.step(100000)


In [None]:
##NO Bias
accept_counter = []
for x in range(cycles):
    print('cycle',x)
    simulation.step(MDsteps)
    current_state = simulation.context.getState(getEnergy=True,getPositions=True)
    current_positions = current_state.getPositions(asNumpy=True)
    current_total_energy = current_state.getKineticEnergy() + current_state.getPotentialEnergy()
    print('MD_end_energy',current_total_energy)
    for y in range(BGmoves):  
        integrator = integrators.LangevinIntegrator(temperature=md_temperature,collision_rate=md_collision_rate,timestep=md_timestep)
        bgsimulation = openmm.app.Simulation(topology,system,integrator,platform,platformProperties=properties_dict)
        bg_positions, bias_new = getbg_positions()
        bgsimulation.context.setPositions(bg_positions)
        bgsimulation.context.setVelocitiesToTemperature(md_temperature)
        new_state = bgsimulation.context.getState(getEnergy=True)
        new_total_energy = new_state.getKineticEnergy() + new_state.getPotentialEnergy()
        energy_change = (new_total_energy - current_total_energy).value_in_unit(unit.kilojoule_per_mole)
        acceptance_prob = min(1,(np.exp(-beta*energy_change)))
        if random.random() < acceptance_prob:
            print('accept new conformation')
            print('accepted BG energy',new_total_energy)
            new_checkpoint = bgsimulation.context.createCheckpoint()
            simulation.context.loadCheckpoint(new_checkpoint)
            accept_counter.append(y)
            break
        else:
            print('rejected BG energy',y,new_total_energy)


In [None]:
# ##WITH BIAS
# accept_counter = []
# for x in range(cycles):
#     print('cycle',x)
#     simulation.step(MDsteps)
#     current_state = simulation.context.getState(getEnergy=True,getPositions=True)
#     current_positions = current_state.getPositions(asNumpy=True)
#     bias_current = getbias(current_positions)
#     current_total_energy = current_state.getKineticEnergy() + current_state.getPotentialEnergy()
#     print('MD_end_energy',current_total_energy)
#     for y in range(BGmoves):  
#         integrator = integrators.LangevinIntegrator(temperature=md_temperature,collision_rate=md_collision_rate,timestep=md_timestep)
#         bgsimulation = openmm.app.Simulation(topology,system,integrator,platform,platformProperties=properties_dict)
#         bg_positions, bias_new = getbg_positions()
#         bgsimulation.context.setPositions(bg_positions)
#         bgsimulation.context.setVelocitiesToTemperature(md_temperature)
#         new_state = bgsimulation.context.getState(getEnergy=True)
#         new_total_energy = new_state.getKineticEnergy() + new_state.getPotentialEnergy()
#         #print('new_tot_energy',new_total_energy)
#         energy_change = (new_total_energy - current_total_energy).value_in_unit(unit.kilojoule_per_mole)
#         acceptance_prob = min(1,(np.exp(-beta*energy_change)*bias_new/bias_current))
#         if random.random() < acceptance_prob:
#             print('accept new conformation')
#             print('accepted BG energy',new_total_energy)
#             new_checkpoint = bgsimulation.context.createCheckpoint()
#             simulation.context.loadCheckpoint(new_checkpoint)
#             accept_counter.append(y)
#             break
#         else:
#             print('rejected BG energy',y,new_total_energy)


In [None]:
print(np.average(accept_counter))

In [None]:
current_positions = current_state.getPositions(asNumpy=True)
torch_positions = torch.tensor(current_positions.value_in_unit(unit.nanometer).reshape(-1,66)).to(ctx)

z = flow.forward(torch_positions,inverse=True)
x_out = flow.forward(z[0])
z_2 = flow.forward(x_out[0],inverse=True)
x_2 = flow.forward(z_2[0])
z_3 = flow.forward(x_out[0],inverse=True)
x_3 = flow.forward(z_3[0])
print('initial_x',torch_positions)
print('z 1',z)
print('x_out',x_out)
print('z 2',z_2)
print('x_2',x_2)
print('z 3',z_3)
print('x_3',x_3)

In [None]:
bg_positions, z = generator.sample(1, with_latent=True)
z_from_inverse = flow.forward(bg_positions, inverse=True)

print(bg_positions)
print(z)
print(z_from_inverse)

x_new = flow.forward(z_from_inverse[0])
x_fromlatent = flow.forward(z)

print(x_new)
print(x_fromlatent)

In [None]:
BG_cartesian = generator.sample(1000)

MD300K_traj = mdtraj.load('300K.dcd',top='ala2_fromURL.pdb',stride=100)
MD300K_cartesian = torch.tensor(MD300K_traj.xyz.reshape(-1,66)).to(ctx)

MD3000K_traj = mdtraj.load('3000K.dcd',top='ala2_fromURL.pdb',stride=100)
MD3000K_cartesian = torch.tensor(MD3000K_traj.xyz.reshape(-1,66)).to(ctx)

BG3000K_traj = mdtraj.load('3000K_samplestraj.dcd',top='ala2_fromURL.pdb',stride=10)
BG3000K_cartesian = torch.tensor(BG3000K_traj.xyz.reshape(-1,66)).to(ctx)

MD300K_superposed_traj = MD300K_traj.superpose(MD300K_traj[0])
MD300K_superposed_cartesian = torch.tensor(MD300K_superposed_traj.xyz.reshape(-1,66)).to(ctx)

MD1000K_traj = mdtraj.load('1000K.dcd',top='ala2_fromURL.pdb',stride=40)
MD1000K_cartesian = torch.tensor(MD1000K_traj.xyz[0:1000].reshape(-1,66)).to(ctx)

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(8,4))
means = {}
dist_from_means = {}
cartesian_sets = {'BG 2000KLL on TSF traj': BG_cartesian,
            #'MD300K': MD300K_cartesian,
            #'MD1000K' : MD1000K_cartesian,
            #'MD300K superposed' : MD300K_superposed_cartesian,
            'MD non superposed 3000 K' : MD3000K_cartesian,
            'BG 2000 KLL trained on superposed 3000 K' : BG3000K_cartesian
            }

names_list = list(cartesian_sets)
for name, cartesian_data in cartesian_sets.items():
    means[name] = torch.mean(cartesian_data, dim = 0)
    dist_from_means[name] = torch.sub(cartesian_data, means[name])
    ax.hist(dist_from_means[name].flatten().cpu().detach().numpy(), bins = 40, label=f'{name}', alpha = (1-names_list.index(name)/5))

ax.legend(bbox_to_anchor = (1.04,1), loc='upper left')
ax.set_xlabel("Distance from mean, Cartesian space")
#ax.set_ylabel(f"Count   [#Samples / {len(cartesian_data[name].flatten())}]")

In [None]:
fig, ax = plt.subplots(figsize=(8,4))
latents = {}
means = {}
dist_from_means = {}
cartesian_sets = {'BG 2000KLL trained on TSF traj': BG_cartesian,
            #'MD300K': MD300K_cartesian,
            #'MD1000K' : MD1000K_cartesian,
            #'MD300K superposed' : MD300K_superposed_cartesian,
            'MD non superposed 3000 K' : MD3000K_cartesian,
            'BG 2000 KLL trained on superposed 3000 K' : BG3000K_cartesian
            }

names_list = list(cartesian_sets)
for name, cartesian_data in cartesian_sets.items():
    latents[cartesian_data] = flow.forward(cartesian_data,inverse=True)[0]
    means[cartesian_data] = torch.mean(latents[cartesian_data], dim = 0)
    dist_from_means[cartesian_data] = torch.sub(latents[cartesian_data], means[cartesian_data])
    ax.hist(dist_from_means[cartesian_data].flatten().cpu().detach().numpy(), bins = 40, label=f'{name}', alpha = (1-names_list.index(name)/5))

ax.legend(bbox_to_anchor = (1.04,1), loc='upper left')
ax.set_xlabel("Distance from mean, latent space")
#ax.set_ylabel(f"Count   [#Samples / {len(BG_cart_distfrommean.flatten())}]")

plt.show()