### Run the first cell only when in Google Colab

In [None]:
# !pip3 install ipympl
# !pip3 install pygrpy==0.1.3
# !pip3 install MDanalysis
# from google.colab import output
# output.enable_custom_widget_manager()

The code below is the same as in the materials from previous classes.

In [None]:
%matplotlib ipympl
# %matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from scipy.special import erf

from IPython.display import HTML
from IPython.display import YouTubeVideo
from IPython.display import Image

In [None]:
# function computing a stochastic trajectory of a single particle
def random_walk(num_steps, dt, D, start, box_length):
    std_dev = np.sqrt(2*D*dt)
    if start == "center": start_pos = np.ones(3)*0.5*box_length # particle starts at box center
    elif start == "random": start_pos = np.random.random(3)*box_length # particle starts at random position
    steps = np.random.normal(0.0, std_dev, size=(num_steps, 3)) # random steps are drawn from normal distribution
    walk = start_pos + np.cumsum(steps, axis=0) # trajectory is a cumulative sum of the random steps
    for frame in walk:
        for i in range(3):
            # if particle exits the box, it is moved to its opposite end (periodic boundary conditions)
            while frame[i] > box_length:
                frame[i] -= box_length
            while frame[i] < 0.0:
                frame[i] += box_length
    return walk

# function updating lines plotted at the animated plot
def update_lines(num, walks, dt, lines, ax):
    for line, walk in zip(lines, walks):
        line.set_data(walk[num-1:num, :2].T)
        line.set_3d_properties(walk[num-1:num, 2])
        ax.set_title('Time={} ps'.format(num*dt))
    return lines

# main function computing stochastic trajectories of a set of particles
def perform_bd(num_steps, dt, D, start, num_particles, box_length):
    return [random_walk(num_steps, dt, D, start, box_length) for index in range(num_particles)]

# function visualizing stochastic motion of particles
def vis(walks, dt, box_length):
    fig = plt.figure()
    ax = fig.add_subplot(projection="3d")
    num_steps = np.shape(walks)[1]
    lines = [ax.plot([], [], [], 'o')[0] for _ in walks]
    ax.set(xlim3d=(0, box_length), xlabel='X (nm)')
    ax.set(ylim3d=(0, box_length), ylabel='Y (nm)')
    ax.set(zlim3d=(0, box_length), zlabel='Z (nm)')
    ani = animation.FuncAnimation(
        fig, update_lines, num_steps, fargs=(walks, dt, lines, ax), interval=100)
#     ani.save('test.mp4')
#     plt.show()
    html = HTML(ani.to_jshtml())
    display(html)

# function counting particles in a given cuboid defined by three pairs of numbers representing three ranges
def count_in_subbox(traj, subbox):
    num_steps = np.shape(traj)[1]
    num_particles = np.shape(traj)[0]
    count = np.zeros(num_steps, dtype = int)
    for j in range(num_steps):
        for k in range(num_particles):
            if traj[k][j][0] >= subbox[0][0] and traj[k][j][0] <= subbox[0][1]:
                if traj[k][j][1] >= subbox[1][0] and traj[k][j][1] <= subbox[1][1]:
                    if traj[k][j][2] >= subbox[2][0] and traj[k][j][2] <= subbox[2][1]:
                        count[j] += 1
                    else:
                        continue
                else:
                    continue
            else:
                continue
    return count

# function visualizing cuboid and concentration in cuboid in time
def vis_subbox(subbox_definition, particle_count, num_particles, dt, box_length):
    fig, ax = plt.subplots()
    DefaultSize = fig.get_size_inches()
    fig.set_size_inches( (DefaultSize[0], DefaultSize[1]) )
    nx=2
    ny=2
    grid = plt.GridSpec(nrows=ny, ncols=nx, wspace=0.5, hspace=0.7)
    axa = plt.subplot(grid[0, 0], projection="3d")
    axb = plt.subplot(grid[0, 1])
    axc = plt.subplot(grid[1, 0])
    axd = plt.subplot(grid[1, 1])
    axc.axis('off')
    axd.axis('off')
    vertices = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1],
    [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]], dtype=int)
    vertices = np.array(subbox_definition)[np.arange(3)[np.newaxis, :].repeat(8, axis=0), vertices]
    edges = np.array([[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6],
    [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]], dtype=int)
    _ = axa.set_xlim((0,box_length))
    _ = axa.set_ylim((0,box_length))
    _ = axa.set_zlim((0,box_length))
    _ = axa.set_xlabel('X (nm)')
    _ = axa.set_ylabel('Y (nm)')
    _ = axa.set_zlabel('Z (nm)')
#     _ = axa.plot(*vertices.T, 'o', color = 'red')
    for i, j in edges:
        _ = axa.plot(*vertices[[i, j], :].T, color='r', ls='-')
    _ = axb.set_ylim((-10, num_particles+10))
    _ = axb.set_xlabel('Time (ps)')
    _ = axb.set_ylabel('Particle count')
    _ = axb.plot(dt*np.arange(1,len(particle_count)+1), particle_count)
    
def write_xyz_to_file(traj, filename, dt):
    with open(filename, 'w') as output_file:
        for n_timestep in range(len(traj[0])):
            output_file.write('{}\n'.format(len(traj)))
            output_file.write('{} time {} ps\n'.format(filename, (n_timestep+1)*dt))
            for n_particle in range(len(traj)):
                output_file.write('X{} {} {} {}\n'.format(n_particle+1, *traj[n_particle][n_timestep]))        

# Mean squared displacement

The purpose of today's classes is to learn to characterize the Brownian (and non-Brownian) motion quantitatively. The theoretical prediction due to Einstein is that the increase in Mean Squared Displacement ($\mathrm{MSD}(t) = \langle \vert\mathbf{r}(t)-\mathbf{r}(0)\vert^2 \rangle$) of independent $d$-dimensional Brownian particles is linear ($\mathrm{MSD}(t)=2dDt$). But be careful, this is true only in a limit of infinite number of particles. In reality, you can expect some deviations from linearity in a form of noise, with magnitude depending on how many particles contribute to the average.

In [None]:
plt.close()
np.random.seed(43673826) # seed used to initialize pseudorandom number generator
# same seed means same random numbers!
box_length = 20.0 # length of the simulation cubic box side in nanometers
num_particles = 50 # number of diffusing particles
num_steps = 10000 # number of steps in simulation
dt = 1 # size of the timestep in picoseconds
ts = dt*np.arange(num_steps)
D = 0.00004 # diffusion coefficient in nanometer squared per picosecond
traj = perform_bd(num_steps=num_steps, dt = dt, D = D, start="center", num_particles=num_particles, box_length = box_length)
plot_every_N_steps = 100 # we cannot visualize every simulation step, because it quickly becomes too memory-consuming
# so in animated figure, we show only timeframes being multiples of N
vis(np.array(traj)[:,::plot_every_N_steps,:], dt*plot_every_N_steps, box_length)
# we can write xyz file with the trajectories of simulated particles
# write_xyz_to_file(traj, 'task1.xyz', dt)

To analyze the trajectories, I wrote the functions computing MSD and TAMSD implementing the equations presented below. I do not provide the code though, you need to implement it yourself to solve the assignment.

$$\mathrm{MSD} ( t = m \Delta t ) = \frac{1}{N_{\mathrm{part}}} \sum_{i = 
    1}^{N_{\mathrm{part}}} \vert \boldsymbol{r}_i \left[ m \Delta t \right] -  \boldsymbol{r}_i \left[ 0 \right] \vert^2  $$
    
$$\mathrm{TAMSD} ( t = m \Delta t ) = \frac{1}{N_{\mathrm{part}}} \sum_{i = 
    1}^{N_{\mathrm{part}}} \frac{1}{N_{\mathrm{steps}}-m} \sum_{k = 
    0}^{N_{\mathrm{steps}}-m-1} \vert \boldsymbol{r}_i \left[ ( k + m ) \Delta t \right] - \boldsymbol{r}_i \left[ k \Delta t \right] \vert^2$$

In [None]:
import dill as pickle
with open('mystery.rst', 'rb', buffering = 0) as restart_file:
    compute_msd = pickle.load(restart_file)
    compute_tamsd = pickle.load(restart_file)
    compute_msd_pbc = pickle.load(restart_file)
    compute_tamsd_pbc = pickle.load(restart_file)

In [None]:
msd_all = compute_msd(traj)
tamsd_all = compute_tamsd(traj, stride=100)

In [None]:
_ = plt.close()
_ = plt.title(r'$N_\mathrm{part}$'+'={}'.format(num_particles))
_ = plt.plot(ts, msd_all, '-', label = 'MSD all particles', color = 'black')
_ = plt.plot(ts[::100], tamsd_all, '-', label = 'TAMSD all particles', color = 'red')
_ = plt.plot(ts, compute_msd([traj[6]]), '--', label = 'MSD example particle', color = 'black')
_ = plt.plot(ts[::100], compute_tamsd([traj[6]], stride=100), '--', label = 'TAMSD example particle', color = 'red')
_ = plt.plot(ts, 6*D*ts, ':', label = r'theory ($6Dt$)')
_ = plt.xlabel('Time (ps)')
_ = plt.ylabel('MSD (nm$^2$)')
_ = plt.legend()

Running the same simulation with larger number of particles in the simulation box results in much better alignment with the theory (see below).

In [None]:
plt.close()
np.random.seed(78548734) # seed used to initialize pseudorandom number generator
# same seed means same random numbers!
box_length = 20.0 # length of the simulation cubic box side in nanometers
num_particles = 600 # number of diffusing particles
num_steps = 10000 # number of steps in simulation
dt = 1 # size of the timestep in picoseconds
ts = dt*np.arange(num_steps)
D = 0.00004 # diffusion coefficient in nanometer squared per picosecond
traj_more = perform_bd(num_steps=num_steps, dt = dt, D = D, start="center", num_particles=num_particles, box_length = box_length)
plot_every_N_steps = 100 # we cannot visualize every simulation step, because it quickly becomes too memory-consuming
# so in animated figure, we show only timeframes being multiples of N
vis(np.array(traj_more)[:,::plot_every_N_steps,:], dt*plot_every_N_steps, box_length)
# we can write xyz file with the trajectories of simulated particles
# write_xyz_to_file(traj, 'task1.xyz', dt)

In [None]:
msd_all_more = compute_msd(traj_more)
tamsd_all_more = compute_tamsd(traj_more, stride=100)

In [None]:
_ = plt.close()
_ = plt.title(r'$N_\mathrm{part}$'+'={}'.format(num_particles))
_ = plt.plot(ts, msd_all, '-', label = 'MSD less particles', color = 'black')
_ = plt.plot(ts, msd_all_more, '--', label = 'MSD more particles', color = 'black')
_ = plt.plot(ts[::100], tamsd_all, '-', label = 'TAMSD less particles', color = 'red')
_ = plt.plot(ts[::100], tamsd_all_more, '--', label = 'TAMSD more particles', color = 'red')
_ = plt.plot(ts, 6*D*ts, ':', label = r'theory ($6Dt$)')
_ = plt.xlabel('Time (ps)')
_ = plt.ylabel('MSD (nm$^2$)')
_ = plt.legend()

# Periodic boundary conditions

Below, in contrast to all previous simulations, the particles start from random positions. Hence their dynamics resembles the experiments with pollen.

In [None]:
YouTubeVideo('R5t-oA796to')

In [None]:
plt.close()
np.random.seed(4354534) # seed used to initialize pseudorandom number generator
# same seed means same random numbers!
box_length = 20.0 # length of the simulation cubic box side in nanometers
num_particles = 200 # number of diffusing particles
num_steps = 10000 # number of steps in simulation
dt = 1 # size of the timestep in picoseconds
ts = dt*np.arange(num_steps)
D = 0.00004 # diffusion coefficient in nanometer squared per picosecond
traj = perform_bd(num_steps=num_steps, dt = dt, D = D, start="random", num_particles=num_particles, box_length = box_length)
plot_every_N_steps = 100 # we cannot visualize every simulation step, because it quickly becomes too memory-consuming
# so in animated figure, we show only timeframes being multiples of N
vis(np.array(traj)[:,::plot_every_N_steps,:], dt*plot_every_N_steps, box_length)
# we can write xyz file with the trajectories of simulated particles
# write_xyz_to_file(traj, 'task1.xyz', dt)

In [None]:
msd_all = compute_msd(traj)
tamsd_all = compute_tamsd(traj, stride=100)

In [None]:
_ = plt.close()
_ = plt.title('N={}'.format(num_particles))
_ = plt.plot(ts, msd_all, '-', label = 'MDS all particles', color = 'black')
_ = plt.plot(ts[::100], tamsd_all, '-', label = 'TAMSD all particles', color = 'red')
_ = plt.plot(ts, compute_msd([traj[6]]), '--', label = 'MSD example particle', color = 'black')
_ = plt.plot(ts[::100], compute_tamsd([traj[6]], stride=100), '--', label = 'TAMSD example particle', color = 'red')
_ = plt.plot(ts, 6*D*ts, ':', label = 'theory')
_ = plt.xlabel('Time (ps)')
_ = plt.ylabel('MSD (nm$^2$)')
_ = plt.legend()

Due to the fact that particles starting at random positions have enough time to reach the edges of the box, periodic boundary conditions introduce artifcts in MSD, which you can see as spooky jumps in the plot. This is because particle exitting the box from one side, enters from the opposite side, which is perceived as extremely large displacement ($\mathrm{MSD} \approx L^2$). To correct for that, unwrapping of the trajectory is necessary. This procedure consists in translating the positions of particle to their position as if the boundaries were not present. You can understand the idea behind this procedure by inspecting the code below.

```
for j in range(len(traj)-1):
    for k in range(3):
        if traj[j+1][k]-traj[j][k] > box_length/2:
            traj[j+1:,k] -= box_length 
        if traj[j+1][k]-traj[j][k] < -box_length/2:
            traj[j+1:,k] += box_length 
```

This idea is implemented in the MSD-computing functions used below.

In [None]:
msd_pbc_all = compute_msd_pbc(traj, box_length)
tamsd_pbc_all = compute_tamsd_pbc(traj, box_length, stride=100)

In [None]:
_ = plt.close()
_ = plt.title('N={}'.format(num_particles))
_ = plt.plot(ts, msd_pbc_all, '-', label = 'MDS all particles', color = 'black')
_ = plt.plot(ts[::100], tamsd_pbc_all, '-', label = 'TAMSD all particles', color = 'red')
_ = plt.plot(ts, 6*D*ts, ':', label = 'theory')
_ = plt.xlabel('Time (ps)')
_ = plt.ylabel('MSD (nm$^2$)')
_ = plt.legend()

# Beyond Brownian motion

The classical description of Brownian motion provided by Einstein, Sutherland, Smoluchowski, and Langevin explains the motion of dilute particles suspended in simple, homogeneous fluids. However, there are physical systems driven by random forces, in which the tracked particle’s MSD is not a linear function of time. Example of such behavior is mRNA in *Escherichia coli* cytoplasm [Golding, I., & Cox, E. C. (2006). Physical nature of bacterial cytoplasm. Physical Review Letters, 96(9), 098102]. If MSD follows a generalized power law expression ($\mathrm{MSD}(t)=2dDt^\alpha$), we call this phenomenon an **anomalous diffusion**.

There are various physical mechanisms responsible for the emergence of anomalous diffusion, among others, active transport of particles by motor proteins ($\alpha>1$), binding to immobile traps ($\alpha<1$), and viscoelastic properties of the medium ($\alpha<1$). The overall behavior of MSD may be even more complex and impossible to express by a single power law, like in case of diffusion under macromolecular crowding.

In [None]:
plt.close()
n_steps = 500
n_traj = 200
stride = 10
from andi_datasets.datasets_theory import datasets_theory
AD = datasets_theory()
# AD.avail_models_name
dataset_subdiffusion = AD.create_dataset(T = n_steps, N_models = n_traj, exponents = [0.7], models = [2], dimension = 3)
dataset_diffusion = AD.create_dataset(T = n_steps, N_models = n_traj, exponents = [1.5], models = [2], dimension = 3)
dataset_superdiffusion = AD.create_dataset(T = n_steps, N_models = n_traj, exponents = [1.0], models = [2], dimension = 3)

def get_trajectories(dataset, n_steps):
    trajs_synth = []
    for j in range(len(dataset)):
        x = dataset[j][2:2+n_steps]
        y = dataset[j][2+n_steps:2+2*n_steps]
        z = dataset[j][2+2*n_steps:]
        traj_synth = np.transpose([x,y,z])
        trajs_synth.append(traj_synth)
    return np.array(trajs_synth)

trajs_subdiffusion = get_trajectories(dataset_subdiffusion, n_steps)
trajs_diffusion = get_trajectories(dataset_diffusion, n_steps)
trajs_superdiffusion = get_trajectories(dataset_superdiffusion, n_steps)

In [None]:
vis(np.array(trajs_subdiffusion)[:,::10,:]+box_length/2, 10, box_length)

In [None]:
tamsd_subdiffusion = compute_tamsd(trajs_subdiffusion, stride=stride)
tamsd_diffusion = compute_tamsd(trajs_diffusion, stride=stride)
tamsd_superdiffusion = compute_tamsd(trajs_superdiffusion, stride=stride)

ts = np.arange(0,n_steps,stride)

In [None]:
_ = plt.close()
_ = plt.plot(ts, tamsd_subdiffusion, color = 'blue', label = r'subdiffusion, $\alpha=0.7$')
_ = plt.plot(ts, tamsd_diffusion, color = 'black', label = r'diffusion, $\alpha=1$')
_ = plt.plot(ts, tamsd_superdiffusion, color = 'red', label = r'superdiffusion, $\alpha=1.5$')

_ = plt.xlabel('Time (ps)')
_ = plt.ylabel('MSD (nm$^2$)')
_ = plt.legend()

from scipy.optimize import curve_fit
anom = lambda t,D,alpha: 6*D*t**alpha
coeff = curve_fit(anom, ts, tamsd_subdiffusion)[0]
print('Fitting model to data\nD={}nm^2/ps^alpha; alpha={}'.format(*coeff))

In log-log representation, all curves following the single power law reduce to lines with a slope equal to $\alpha$.

$$\mathrm{MSD}(t) = 6Dt^\alpha$$
$$\log(\mathrm{MSD}(t)) = \log(6Dt^\alpha) = \log(6D)+\alpha \log t$$

In [None]:
_ = plt.close()
_ = plt.plot(ts, tamsd_subdiffusion, color = 'blue', label = r'subdiffusion, $\alpha=0.7$')
_ = plt.plot(ts, tamsd_diffusion, color = 'black', label = r'diffusion, $\alpha=1$')
_ = plt.plot(ts, tamsd_superdiffusion, color = 'red', label = r'superdiffusion, $\alpha=1.5$')

_ = plt.xlabel('Time (ps)')
_ = plt.ylabel('MSD (nm$^2$)')

_ = plt.xscale('log')
_ = plt.yscale('log')
_ = plt.legend()

# References

1. Berg, H. C. (1993). Random walks in biology. Princeton University Press.

2. Van Kampen, N. G. (1992). Stochastic Processes in Physics and Chemistry. North-Holland Publishing Co.

3. Rehfeldt, F., & Weiss, M. (2023). The random walker's toolbox for analyzing single-particle tracking data. Soft Matter.

4. Skóra, T. Diffusion and reactions under crowding: Theory and simulations. PhD Thesis (2023) https://tskora.github.io/publication/2023-01-01-Thesis

5. Kondrat, S. (2018). Physics and modelling of intracellular diffusion. arXiv preprint arXiv:1810.05496. https://arxiv.org/abs/1810.05496