In [None]:
import numpy as np
from numba import jit

import matplotlib.pyplot as plt
from functools import partial

import sys
sys.path.append('../miniMD')
from miniMD import *

## Transition Path Sampling

In this transition path sampling (TPS) exercise we aim to sample all trajectories that connect two stable states A and B in our model potential. For that, we can use a Monte Carlo algorithm following this rough scheme:

1) We pick a point on the previous/initial path randomly with a uniform probability, this point is refered to as a shooting point
2) Depending on the dynamics, we might want to perturb this point such that the new path differs from the old.
3) Start a simulation forward and backward in time and integrate either each for $N/2$ steps or until a stable state is reached
4) Invert the backward trajectory and concatenate the inverted backward and forward trajectory to obtain the full proposal path
5) Accept or reject the new trajectory to sample the correct path distribution

Having discussed these steps in detail in the lecture, we can here look at the practical aspects when using overdamped Langevin dynamics as in the last tutorials. Step 1 works out unchanged. However, at step 2) we do not need to modify our shooting point since the stochastic nature of the dynamics lead to a unique new path even when starting from the same initial point. In step 3), one usually negates the velocities to integrate backward in time. Afterwards in 4), given that microscopic reversibility applies, one can invert the order of the trajectory. With overdamped dynamics we do not consider velocities of the particles and, therefore, integrating forward and backward in time just means starting two trajectories and then following 4). Last but not least, using the scheme described in this section, the acceptance criterion in 5) distills down to an easy expression.

For fixed-length TPS, where we integrate for $N/2$ steps forward and $N/2$ steps backward, the acceptance criterion reduces to:
$$p_{acc}(X \to X') = h_A(x'_0) h_B(x'_N)$$ 
That means, we can accept any path that starts in A and ends in B. Since we are often not interested in the specific direction of the path, we can also accept according to:
$$p_{acc}(X \to X') = h_A(x'_0) h_B(x'_N) + h_B(x'_0) h_A(x'_N)$$ 
meaning we accept any path with endpoints in opposing states.

For flexibel-length TPS, the selection probability of the shooting point has to be included in the generation probability of the new path and consequently also occurs in the acceptance criterion:
$$p_{acc}(X \to X') = \min \left\{1,  h_A(x'_0) h_B(x'_N) \frac{N}{N'} \right\}$$
where $N'$ is the length of the new path and $N$ is the length of the old path.

We start our code with the well-known model potential:

In [None]:

@jit(nopython=True) 
def custom_potential_energy(current_x : np.ndarray) -> float:
    """
    Calculates the potential energy given a configuration current_x. 
    
    Parameters
    ----------
    current_x : np.ndarray
        Current configuration to be propagated. The shape of the array(current_x.shape) can vary depending on the system which is simulated.

    Returns
    -------
    U : float
        Potential energy of the configuration

    """
    return 10 * ((current_x[0]**2 - 1)**2 + (current_x[0] - current_x[1])**2)


@jit(nopython=True) 
def custom_force_function(current_x : np.ndarray) -> np.ndarray:
    """
    Calculates the force given a configuration current_x. 

    Parameters
    ----------
    current_x : np.ndarray
        Current configuration to be propagated. The shape of the array(current_x.shape) can vary depending on the system which is simulated.

    Returns
    -------
    force : np.ndarray
        Force corresponding the provided configuration.

    """
    force = np.zeros(2)
    
    force[0] = -2 * 10 * (2 * current_x[0]**3 -current_x[0] - current_x[1])
    force[1] = 2 * 10 * (current_x[0] - current_x[1])

    return force


For the definition of states A and B we use a collective variable defined as the diagonal $x+y$:

In [None]:

@jit(nopython=True) 
def custom_cv(current_x : np.ndarray) -> float:
    """
    Calculates the collective variable zeta given a configuration current_x.

    Parameters
    ----------
    current_x : np.ndarray
        Current configuration. The shape of the array(current_x.shape) can vary depending on the system which is simulated.

    Returns
    -------
    zeta : float
        CV corresponding the provided configuration.
    """

    return current_x[..., 0] + current_x[..., 1]

Below are given some simulation parameters for orientation:

In [None]:
total_trials = 5000 # Number of TPS trial steps
equilibration_trials = 100 # Before saving trajectories, do these steps
path_output_frequency = 10 # Save a trajectoy ever path_output_frequency steps

path_length = 500   # path length for fixed length TPS, maximum path length for flexible length TPS
configuration_output_frequency = 1 # How often configurations are saved on a path

beta = 1
timestep = 0.001
diffusion_coefficient = 1


assert equilibration_trials < total_trials, "Make sure you don't equilibrate longer than you simulate."
assert path_output_frequency < total_trials, "Make sure you don't output less often than you simulate."
assert path_output_frequency > 0, "The output frequency needs to be larger than 0"


We can use the state definition function also used in the TST notebook to check if we are in a certain state.

**1) Implement two stable state functions, one should go from -inf to -2 (state A) and the other from 2 to +inf (state B).**

In [None]:

@jit(nopython=True) 
def custom_bounded_state(current_x : np.ndarray, cv_function : callable, bounds : tuple = (-np.inf, np.inf)  ) -> bool:
    """
    Returns a bool or bool array which is true if current_x is within in the bounds.

    Parameters
    ----------
    current_x : np.ndarray
        Current configuration. The shape of the array(current_x.shape) can vary depending on the system which is simulated.

    cv_function : callable
        Function that accepts the current configuration and maps it onto the collective variable.
        
    bounds : (float, float)
        The lower and upper bounds for x to be considered within the stable state


    Returns
    -------
    inState : bool
        True for each x in current_x if x is within the bounds
    """
    cv_values = cv_function(current_x)
    
    return (cv_values > bounds[0]) & (cv_values < bounds[1])


state_A_indicator = partial(...)
state_B_indicator = partial(...)

Let us visualize the state definitions before starting the sampling:

In [None]:


x_values = np.linspace(-2.6, 2.6, 100)
y_values = np.linspace(-2.6, 2.6, 100)

x_grid, y_grid = np.meshgrid(x_values, y_values)
energies = np.zeros((len(x_values), len(y_values)))
in_state_A = np.zeros((len(x_values), len(y_values)))
in_state_B = np.zeros((len(x_values), len(y_values)))

for i in range(len(x_values)):
    for j in range(len(y_values)):

        energies[i,j] = custom_potential_energy(np.array([x_grid[i, j], y_grid[i, j]]))
        in_state_A[i,j] = state_A_indicator(np.array([x_grid[i, j], y_grid[i, j]]))
        in_state_B[i,j] = state_B_indicator(np.array([x_grid[i, j], y_grid[i, j]]))


In [None]:
fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()

Next we want to write a small helper function to generate a trajectory given a initial point called shooting_point. For this, you can follow the miniMD tutorial and refactor it into a function.

**2) Complete the generate_path function defined below.**

In [None]:
def generate_path(shooting_point : np.ndarray, path_length : int,
                    configuration_output_frequency : int, 
                    beta : float, timestep : float, 
                    diffusion_coefficient : float):
    """
    Returns a trajectory started from the shooting point.

    Parameters
    ----------
    shooting_point : np.ndarray
        Initial configuration of the trajectory.

    path_length : int
        Number of integration steps of the trajectory
        
    configuration_output_frequency : int
        Output frequency
        
    beta : float
        1 / kT

    timestep : float
        Timestep of integration

    diffusion_coefficient : float
        Diffusion coefficient of the overdamed dynamics

    Returns
    -------
    trajectory : np.ndarray
        Trajectory started from shooting_point as initial configuration
    """
    
    previous_x = shooting_point.copy()

    ...
    
    return trajectory

The next ingredient needed for the path sampling is an initial path for starting the Monte Carlo algorithm. Similar to MCMC in configuration space, we can a "bad" guess since it will relax to a more probable path during sampling. Here, we make use of the low dimensionality of the system and just use a line between A and B as an initial path:

In [None]:
current_path = np.array([np.linspace(-1, 1, path_length),np.linspace(-1, 1, path_length)]).T

In [None]:

fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

plt.plot(current_path[:,0], current_path[:,1])

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

ax.text(0.025, 0.1, "State A", transform=ax.transAxes, c="C0")
ax.text(0.825, 0.9, "State B", transform=ax.transAxes, c="C1")

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()


Finally we can code the actual path sampling algorithm.

**3) Implement a fixed length TPS algorithm as sketched out below.**

In [None]:

# Initialize output variables
trajectory_ensemble = [] # List that should contain the trajectory ensemble
accepted_shooting_points = [] # List that should contain all shooting points from which we managed to get a reactive path

current_path = np.array([np.linspace(-1, 1, path_length),np.linspace(-1, 1, path_length)]).T
accepted_trials = 0

# Sampling
for trial in range(total_trials):

    # Select shooting point from old path
    shooting_point = ...

    # Generate forward and backward trajectory
    forward_trj = generate_path(shooting_point, path_length // 2 + 1, configuration_output_frequency, beta, timestep, diffusion_coefficient)
    reverse_trj = generate_path(shooting_point, path_length // 2, configuration_output_frequency, beta, timestep, diffusion_coefficient)

    # Check if endpoints are in opposing stable states
    fw_in_A = state_A_indicator(forward_trj[-1])
    fw_in_B = ...

    rv_in_A = ...
    rv_in_B = ...

    is_reactive_path = ...

    # If it is reactive, accept move
    if is_reactive_path:

        current_path = np.vstack([reverse_trj[::-1], forward_trj[1:]])

        accepted_shooting_points.append(...)
        
        accepted_trials += 1

    # Add path to ensemble
    if trial > equilibration_trials and trial % path_output_frequency == 0:
        trajectory_ensemble.append(current_path)

In [None]:
print("Accepted Trials: ", accepted_trials)

We can visualize the resulting path ensemble. Since this is usually a bit more difficult than visualizing configurations we prepared the following plots, but feel free to give it a shot yourself.

Here we can look at the last path in the ensemble in more detail:

In [None]:


fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

plt.plot(trajectory_ensemble[-1][:,0], trajectory_ensemble[-1][:,1], c="C2", lw=1)

plt.scatter(trajectory_ensemble[-1][0,0], trajectory_ensemble[-1][0,1], c="r", s=10, zorder=10)
plt.scatter(trajectory_ensemble[-1][-1,0], trajectory_ensemble[-1][-1,1], c="r", s=10, zorder=10)

plt.scatter(accepted_shooting_points[-1][0], accepted_shooting_points[-1][1], c="b", s=10, zorder=10)

ax.text(0.025, 0.1, "State A", transform=ax.transAxes, c="C0")
ax.text(0.825, 0.9, "State B", transform=ax.transAxes, c="C1")

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()

# Red Dots: Initial and last point
# Blue Dot: Shooting Point
# Green: Last path

And now we can visualize all trajectories:

In [None]:


fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

for t in trajectory_ensemble:
    plt.plot(t[:,0], t[:,1], alpha=1, lw=0.2)
    plt.scatter(t[0,0], t[0,1], c="r", s=3, zorder=10)
    plt.scatter(t[-1,0], t[-1,1], c="r", s=3, zorder=10)

for p in accepted_shooting_points:
    plt.scatter(p[0], p[1], c="b", s=10, zorder=10)

ax.text(0.025, 0.1, "State A", transform=ax.transAxes, c="C0")
ax.text(0.825, 0.9, "State B", transform=ax.transAxes, c="C1")

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()


# Red Dots: Initial and last points
# Blue Dot: Shooting Points
# Lines: Trajectory ensemble

# Flexible Length

To extend the code to flexible length TPS, we have to make two small changes:

1) Implement generate_path_to_state, which is essentially generate_path but we stop the moment we are in a state and return the trajectory up to that point
2) Include the path length ratio in the acceptance criterion

**4) Adjust your generate_path method to stop when a stable state is reached.**

In [None]:
def generate_path_to_state(shooting_point : np.ndarray, path_length : int,
                           state_functions : list,
                            configuration_output_frequency : int, 
                            beta : float, timestep : float, 
                            diffusion_coefficient : float):
    """
    Returns a trajectory started from the shooting point.

    Parameters
    ----------
    shooting_point : np.ndarray
        Initial configuration of the trajectory.

    path_length : int
        MAXIMUM number of integration steps of the trajectory
        
    state_functions : list
        List of state indicator functions to check against if current_x is in a state (for use [state_A_indicator, state_B_indicator])
        
    configuration_output_frequency : int
        Output frequency
        
    beta : float
        1 / kT

    timestep : float
        Timestep of integration

    diffusion_coefficient : float
        Diffusion coefficient of the overdamed dynamics

    Returns
    -------
    trajectory : np.ndarray
        Trajectory started from shooting_point as initial configuration
    """

    previous_x = shooting_point.copy()
    
    ...

    return trajectory

We can again start with our initial path as a line from A to B:

In [None]:
current_path = np.array([np.linspace(-1, 1, path_length),np.linspace(-1, 1, path_length)]).T

In [None]:

fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

plt.plot(current_path[:,0], current_path[:,1])

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

ax.text(0.025, 0.1, "State A", transform=ax.transAxes, c="C0")
ax.text(0.825, 0.9, "State B", transform=ax.transAxes, c="C1")

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()


Now to the sampling code: Due to outsourcing the generate_path_to_state method, we do only need to make a small adjustment in the aceptance criterion. That is, to include the ratio of path lengths as described in the introduction at the top of the notebook.

**5) Adjust your sampling code to support flexibel length transition paths.**

In [None]:

# Initialize output variables
trajectory_ensemble = [] # List that should contain the trajectory ensemble
accepted_shooting_points = [] # List that should contain all shooting points from which we managed to get a reactive path

current_path = np.array([np.linspace(-1, 1, path_length),np.linspace(-1, 1, path_length)]).T
accepted_trials = 0

# Sampling
for trial in range(total_trials):

    # Select shooting point from old path
    shooting_point = ...

    # Generate forward and backward trajectory
    forward_trj = generate_path_to_state(shooting_point, path_length // 2 + 1, [state_A_indicator, state_B_indicator], configuration_output_frequency, beta, timestep, diffusion_coefficient)
    reverse_trj = generate_path_to_state(shooting_point, path_length // 2, [state_A_indicator, state_B_indicator], configuration_output_frequency, beta, timestep, diffusion_coefficient)

    # Check if endpoints are in opposing stable states
    fw_in_A = state_A_indicator(forward_trj[-1])
    fw_in_B = ...

    rv_in_A = ...
    rv_in_B = ...

    is_reactive_path = ...

    # If it is reactive, check length ratio
    if is_reactive_path:

        # calculate length ratio
        length_ratio = ...
        
        # Accept or reject
        if ... :

            current_path = np.vstack([reverse_trj[::-1], forward_trj[1:]])

            accepted_shooting_points.append(...)
            
            accepted_trials += 1

    # Add path to ensemble
    if trial > equilibration_trials and trial % path_output_frequency == 0:
        trajectory_ensemble.append(current_path)

**6) Describe the difference between the flexibel and fixed length path ensembles given the visualization below.**

In [None]:


fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

plt.plot(trajectory_ensemble[-1][:,0], trajectory_ensemble[-1][:,1], c="C2", lw=1)

plt.scatter(trajectory_ensemble[-1][0,0], trajectory_ensemble[-1][0,1], c="r", s=10, zorder=10)
plt.scatter(trajectory_ensemble[-1][-1,0], trajectory_ensemble[-1][-1,1], c="r", s=10, zorder=10)

ax.text(0.025, 0.1, "State A", transform=ax.transAxes, c="C0")
ax.text(0.825, 0.9, "State B", transform=ax.transAxes, c="C1")

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()


In [None]:


fig, ax = plt.subplots(1, figsize=(4,4), dpi=180)

ax.contourf(x_grid, y_grid, energies, levels=np.linspace(0, 20, 10), cmap="RdBu_r")
ax.contourf(x_grid, y_grid, in_state_A, cmap="Blues", alpha=0.1)
ax.contourf(x_grid, y_grid, in_state_B, cmap="Oranges", alpha=0.1)

for t in trajectory_ensemble:
    plt.plot(t[:,0], t[:,1], alpha=1, lw=0.2)
    plt.scatter(t[0,0], t[0,1], c="r", s=3, zorder=10)
    plt.scatter(t[-1,0], t[-1,1], c="r", s=3, zorder=10)

ax.text(0.025, 0.1, "State A", transform=ax.transAxes, c="C0")
ax.text(0.825, 0.9, "State B", transform=ax.transAxes, c="C1")

ax.set_xlabel("x")
ax.set_ylabel("y")

plt.show()


In the last part of this notebook, we can have a look at the free energy as a function of the collective variables.
We can compare the free energy of configurations on transition paths with a reference free energy of an equilibrium simulation. 

**7) Estimate the free energy along the custom_cv for configurations on transition paths.**
TIP: you should first pool all configurations in a big numpy array and then use np.histogram.

In [None]:
bins = np.linspace(-3, 3, 50)
bin_centers = (bins[:-1] + bins[1:]) / 2

cv_values = ...
density, bin_edges = np.histogram(cv_values, bins=bins, density=True)

free_energy = ...

**8) Compare the free energy based on configurations on transition paths with the equilibrium simulation free energy and explain the cause for the difference.**

In [None]:
fig, ax = plt.subplots(1, figsize=(4,3), dpi=180)

ref_x, ref_F = np.load("bin_centers_x+y.npy"), np.load("free_energy_x+y.npy")
ref_F -= np.min(ref_F)

ax.plot(ref_x, ref_F, label="Reference")
ax.plot(bin_centers, free_energy - np.min(free_energy), label="TPS")

ax.legend(frameon=False)
ax.set_xlabel("x")
ax.set_ylabel("F(x)")
plt.show()