# Solution of optimization exercises

In [None]:
%matplotlib inline 
# This is for plotting
import math
import cmath
import numpy as np
from matplotlib.pyplot import subplots
from matplotlib.colors import LogNorm

In [None]:
n_cpu = 4

In [None]:
# Some constants valid for all calculation
# Miller index of reflection
H = 0 
# Miller index of reflection
K = 4 
# Number of unit cells per direction
N = 32 
# Defines how much points are needed to describe a single Laue fringe (2 = Nyquist frequency)
oversampling = 3

# Maximum strain at surface
e0 = 0.01 
# Width of the strain profile below the surface
w = 5.

# Generate real and reciprocal space coordinates
n = np.arange(N)
m = np.arange(N)
h = np.arange(H-0.5, H+0.5, 1./(oversampling*N))
k = np.arange(K-0.5, K+0.5, 1./(oversampling*N))

In [None]:
def validate_ci(result):
    "Return the error value (for exercises)"
    reference = np.load("reference_ci.npy")
    return abs(np.array(reference)-result).max()/reference.max()

def display(result):
    "Display the array"
    fig, ax = subplots()
    fig.suptitle("Bragg peak")
    ax.imshow(result.T, extent=(h.min(), h.max(), k.min(), k.max()), norm=LogNorm(), origin = 'lower')
    ax.set_xlabel('H');ax.set_ylabel('K')
    ax.set_title(f"Crystal {N}x{N}")

## Python implementation

### Circular crystal

In [None]:
def Circ_python_1(N, h, k):
    x = (np.arange(N) - N/2).reshape(-1, 1)
    y = (np.arange(N) - N/2).reshape(1, -1)
    omega = x*x + y*y <= (N/2)**2
    result = np.zeros((h.size, k.size))
    for i_h, v_h in enumerate(h):  # loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            # One should discard bad values
            tmp = 0.0
            for n in range(N):  # loop and sum over unit-cells
                for m in range(N):
                    if omega[n, m]:
                        tmp += cmath.exp(2j*np.pi*(v_h*n + v_k*m))
            result[i_h][i_k] = abs(tmp)**2
    return result

# Compute
%time intensity = Circ_python_1(N, h, k)
print("Error measured:", validate_ci(intensity))

In [None]:
# Alternative using Python `sum`
def Circ_python_1(N, h, k):
    # Filter-out position outside crystal once for all
    inside_pos = [(n, m) for n in range(N) for m in range(N)
           if ((n-N/2)**2 + (m-N/2)**2) <= (N/2)**2]

    result = np.zeros((h.size, k.size))
    for i_h, v_h in enumerate(h):  # loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            result[i_h][i_k] = abs(
                sum(  # Sum over positions inside the crystal
                    cmath.exp(2j*np.pi*(v_h*n + v_k*m))
                    for n, m in inside_pos
                )
            )**2
    return result

# Compute
%time intensity = Circ_python_1(N, h, k)
print("Error measured:", validate_ci(intensity))

### Circular strained crystal

In [None]:
def Circ_python(N, h, k):
    N_2 = N / 2
    positions = {}
    for i in range(N):
        x = i - N_2
        for j in range(N):
            y = j - N_2
            r = (x*x + y*y)**0.5
            if r<=N_2:
                strain = e0 * (1 + math.tanh((r - N_2)/w))
                positions[(i,j)] = (i + strain*x,
                                    j + strain*y)
    result = np.zeros((h.size, k.size))
    for i_h, v_h in enumerate(h): #loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            #One should discard  bad values
            tmp = 0.0
            for i_n in range(N):#loop and sum over unit-cells
                for i_m in range(N):
                    pos = positions.get((i_n, i_m))
                    if pos:
                        n_s, m_s = pos
                        tmp += cmath.exp(2j*np.pi*(v_h*n_s + v_k*m_s))
            result[i_h, i_k] = abs(tmp)**2
    return result

%time intensity = Circ_python(N, h, k)
print("Error measured:", validate_ci(intensity))

In [None]:
# Alternative computing list of strained position
def Circ_python(N, h, k):
    # Compute strained position inside the crystal once for all
    strained_pos = []
    crystal_radius = N/2
    for n in range(N):
        for m in range(N):
            # Center is at (N/2, N/2)
            x = n - crystal_radius
            y = m - crystal_radius
            radius = (x**2 + y**2)**0.5
            if radius <= crystal_radius:
                delta = e0 * (1 + math.tanh((radius - crystal_radius)/w))
                strained_pos.append((n + delta*x, m + delta*y))

    result = np.zeros((h.size, k.size))
    for i_h, v_h in enumerate(h): #loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            result[i_h][i_k] = abs(
                sum(
                    cmath.exp(2j*np.pi*(v_h*n_s + v_k*m_s))
                    for n_s, m_s in strained_pos
                )
            )**2
    return result

%time intensity = Circ_python(N, h, k)
print("Error measured:", validate_ci(intensity))

## NumPy implementation

In [None]:
def Circ_numpy(N, h, k):
    N_2 = N/2
    h = h.reshape(-1, 1, 1, 1)
    k = k.reshape(1, -1, 1, 1)
    n = np.arange(N).reshape(1, 1, -1, 1)
    m = np.arange(N).reshape(1, 1, 1, -1)
    radius = np.sqrt((n-N_2)**2 + (m-N_2)**2)
    strain = e0 * (1.0 + np.tanh((radius - N_2)/w))
    p_n = n + strain*(n-N_2)
    p_m = m + strain*(m-N_2)
    omega = radius <= N_2    
    tmp = omega * np.exp(2j*np.pi*(h*p_n + k*p_m))
    return np.abs(tmp.sum(axis=(2,3)))**2

%time intensity = Circ_numpy(N, h, k)
print("Error measured:", validate_ci(intensity))

## NumExpr implementation

In [None]:
import numexpr as ne
ne.set_num_threads(n_cpu)  # Limit the number of threads to be used

In [None]:
def Circ_numexpr(N, h, k):
    N_2 = N/2
    h = h.reshape(-1, 1, 1, 1)
    k = k.reshape(1, -1, 1, 1)
    n = np.arange(N).reshape(1, 1, -1, 1)
    m = np.arange(N).reshape(1, 1, 1, -1)
    radius = ne.evaluate("sqrt((n - N_2)**2 + (m - N_2)**2)")
    strain = ne.evaluate("e0 * (1 + tanh((radius-N_2) / w))")
    j2pi = np.pi*2j
    tmp = ne.evaluate("where(radius > N_2, 0, exp(j2pi*(h*(n+strain*(n-N_2)) + k*(m+strain*(m-N_2)))))")
    result = abs(tmp.sum(axis=(2,3)))**2
    return result

%time intensity = Circ_numexpr(N, h, k)
print("Error measured:", validate_ci(intensity))

## Numba implementation

In [None]:
import numba as nb
nb.set_num_threads(n_cpu)  # Limit the number of cores to be used

In [None]:
@nb.jit(parallel=True)
def Circ_numba(N, h, k):
    result = np.zeros((h.size,k.size), dtype=np.float64)
    N_2 = N/2
    for h_i in nb.prange(h.size):  # loop over the reciprocal space coordinates
        for k_i in range(k.size):
            tmp = 0j
            for n in range(N):  # loop and sum over unit-cells
                for m in range(N):
                    radius = math.sqrt((n - N_2)** 2 + (m - N_2)** 2)
                    if (radius > (N_2)):
                        value = 0j
                        # continue  # Numba isn't working using the same continue pattern as below
                    else:
                        strain = e0 * (1 + math.tanh((radius-N_2)/w))
                        p_n = n + strain*(n - N_2)
                        p_m = m + strain*(m - N_2)
                        value = np.exp(2j*cmath.pi*(h[h_i]*p_n + k[k_i]*p_m))
                    tmp +=  value
            result[h_i, k_i] = abs(tmp)**2
    return result

%time intensity = Circ_numba(N, h, k)
print("Error measured:", validate_ci(intensity))

## Cython

In [None]:
import os
os.environ["OMP_NUM_THREADS"] = str(n_cpu)
# This enables the %cython mode
%load_ext Cython

In [None]:
%%cython --compile-args=-fopenmp --link-args=-fopenmp -a
#%%cython -a
#cython: embedsignature=True, language_level=3, binding=True
# cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False,
## This is for developping:
## cython: profile=True, warn.undeclared=True, warn.unused=True, warn.unused_result=False, warn.unused_arg=True

import numpy as np
from cython.parallel import prange
from libc.math cimport sqrt, pi, tanh

# With Cython3: from libc.complex cimport cabs, cexp
# Accessing C code from cython (out of the scope for today)
cdef extern from "complex.h" nogil:
    double cabs(double complex)
    double complex cexp(double complex)

def Circ_cython(int N, 
                double[::1] h, 
                double[::1] k,
                double e0,
                double w):
    cdef:
        double[:, ::1] result
        double N_2, p_n, p_m, strain, radius, n_v, m_v
        double complex tmp, two_j_pi,  value
        int i_h, i_k, m, n, h_size, k_size
        
    two_j_pi = np.pi*2j
    h_size = h.shape[0]
    k_size = k.shape[0]
    result = np.zeros((h_size, k_size))
    N_2 = N / 2.0
    for i_h in prange(h_size, nogil=True):  # loop over the reciprocal space coordinates
        for i_k in range(k_size):
            tmp = 0.0
            for n in range(N):  # loop and sum over unit-cells
                for m in range(N):
                    radius = sqrt((n - N_2)** 2 + (m - N_2)** 2)
                    if (radius > (N_2)):
                        value = 0.0
                    else:
                        strain = e0 * (1.0 + tanh((radius-N_2)/w))
                        p_n = n + strain*(n - N_2)
                        p_m = m + strain*(m - N_2)
                        value = cexp(2j*pi*(h[i_h]*p_n + k[i_k]*p_m))
                    tmp +=  value
            result[i_h, i_k] += cabs(tmp)**2
    return np.asarray(result)

In [None]:
%time intensity = Circ_cython(N, h, k, e0, w)
print("Error measured:", validate_ci(intensity))