## BLonD core tracking example

A more complete example drawn from BLonD. We will profile and optimize the main 
tracking loop that consists of the kick, drift and profile operations. 


In [None]:
# We will need this
%load_ext line_profiler

In [None]:
import math
import numpy as np
from numba import njit


def kick_numpy(dt, dE, voltage, omega, phi, acc_kick):
    for j in range(len(voltage)):
        dE += voltage[j] * np.sin(omega[j] * dt + phi[j])
    dE += acc_kick


def drift_numpy(dt, dE, T0, length_ratio, beta, energy, alpha_zero,
                alpha_one, alpha_two):

    inv_beta_sq = 1. / beta**2
    inv_ene_sq = 1. / energy**2
    beam_delta = np.sqrt(1. +
                         inv_beta_sq * (dE**2 * inv_ene_sq + 2 * dE/energy)) - 1.
    dt += T0 * length_ratio * (
        (1 + alpha_zero * beam_delta +
         alpha_one * (beam_delta**2) +
         alpha_two * beam_delta**3) *
        (1 + dE/energy) / (1 + beam_delta) - 1)


def histo_numpy(dt, profile, cut_left, cut_right):
    profile, _ = np.histogram(dt, bins=len(profile),
                              range=(cut_left, cut_right))



In [None]:
# Initialize parameters
n_particles = 1000000
n_slices = 512
n_iter = 10

dt = np.random.normal(loc=1e-9, scale=1e-10, size=n_particles)
dE = np.random.normal(loc=0, scale=1e7, size=n_particles)
profile = np.zeros(n_slices, dtype=float)


cut_left = 0.9 * dt.min()
cut_right = 1.1 * dt.max()
voltage = np.array([6e6])
omega_rf = np.array([2.5e9])
phi_rf = np.array([0.0])
acc_kick = 1e4
beta = 0.99
alpha_zero = 1e-2
alpha_one = 1e-3
alpha_two = 1e-4
energy = 450e9
T0 = 9e-5
length_ratio = 1.0

# The main tracking loop, repeats the kick, drift and histogram steps 
def loop_numpy(dt, dE, profile, cut_left, cut_right, voltage, omega, phi,
               acc_kick, T0, length_ratio, beta, energy,
               alpha_zero, alpha_one, alpha_two):

    for i in range(n_iter):
        kick_numpy(dt, dE, voltage, omega, phi, acc_kick)
        drift_numpy(dt, dE, T0, length_ratio, beta, energy, alpha_zero, alpha_one, alpha_two)
        profile *= 0.0
        histo_numpy(dt, profile, cut_left, cut_right)






In [None]:
%%timeit
loop_numpy(dt, dE, profile, cut_left, cut_right,
           voltage, omega_rf, phi_rf, acc_kick, T0, length_ratio, beta,
           energy, alpha_zero, alpha_one, alpha_two)


In [None]:
%lprun -f loop_numpy loop_numpy(dt, dE, profile, cut_left, cut_right, voltage, omega_rf, phi_rf, acc_kick, T0, length_ratio, beta, energy, alpha_zero, alpha_one, alpha_two)


In [None]:

# We use the nopython mode + automatic parallelization
kick_jit = njit(parallel=True)(kick_numpy)
drift_jit = njit(parallel=True)(drift_numpy)
histo_jit = njit(parallel=True)(histo_numpy)


def loop_jit(dt, dE, profile, cut_left, cut_right, voltage, omega, phi,
                   acc_kick, T0, length_ratio, beta, energy,
                   alpha_zero, alpha_one, alpha_two):
    for i in range(n_iter):
        kick_jit(dt, dE, voltage, omega, phi, acc_kick)
        drift_jit(dt, dE, T0, length_ratio, beta, energy,
                        alpha_zero, alpha_one, alpha_two)
        profile *= 0.0
        histo_jit(dt, profile, cut_left, cut_right)


In [None]:
%%timeit
loop_jit(dt, dE, profile, cut_left, cut_right,
           voltage, omega_rf, phi_rf, acc_kick, T0, length_ratio, beta,
           energy, alpha_zero, alpha_one, alpha_two)


In [None]:
%lprun -f loop_jit loop_jit(dt, dE, profile, cut_left, cut_right, voltage, omega_rf, phi_rf, acc_kick, T0, length_ratio, beta, energy, alpha_zero, alpha_one, alpha_two)


Kick and drift were largely improved, but histogram can be improved further.

Let's give it a try. 

In [None]:

def histo_unrolled(dt, profile, cut_left, cut_right):
    inv_bin_width = len(profile) / (cut_right - cut_left)
    target_bin = np.floor((dt - cut_left) * inv_bin_width)
    for i in range(len(target_bin)):
        if target_bin[i] >= 0 and target_bin[i] < len(profile):
            profile[int(target_bin[i])] += 1.0


histo_jit = njit(parallel=True)(histo_unrolled)



In [None]:
%%timeit
loop_jit(dt, dE, profile, cut_left, cut_right,
         voltage, omega_rf, phi_rf, acc_kick, T0, length_ratio, beta,
         energy, alpha_zero, alpha_one, alpha_two)


In [None]:
%lprun -f loop_jit loop_jit(dt, dE, profile, cut_left, cut_right, voltage, omega_rf, phi_rf, acc_kick, T0, length_ratio, beta, energy, alpha_zero, alpha_one, alpha_two)


We should stop at some time. The percentages will always add up to 100%!