# Solution of Cython optimization exercises

In [None]:
import numpy as np

In [None]:
n_cpu = 4

In [None]:
def validate_ci(result):
    "Return the error value (for exercises)"
    reference = np.load("reference_ci.npy")
    return abs(np.array(reference)-result).max()/reference.max()

In [None]:
# Some constants valid for all calculation
# Miller index of reflection
H = 0 
# Miller index of reflection
K = 4 
# Number of unit cells per direction
N = 32 
# Defines how much points are needed to describe a single Laue fringe (2 = Nyquist frequency)
oversampling = 3

# Maximum strain at surface
e0 = 0.01 
# Width of the strain profile below the surface
w = 5.

# Generate real and reciprocal space coordinates
n = np.arange(N)
m = np.arange(N)
h = np.arange(H-0.5, H+0.5, 1./(oversampling*N))
k = np.arange(K-0.5, K+0.5, 1./(oversampling*N))

In [None]:
import os
os.environ["OMP_NUM_THREADS"] = str(n_cpu)
# This enables the %cython mode
%load_ext Cython

In [None]:
%%cython --compile-args=-fopenmp --link-args=-fopenmp -a
#%%cython -a
#cython: embedsignature=True, language_level=3, binding=True
#cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False,
## This is for developping:
## cython: profile=True, warn.undeclared=True, warn.unused=True, warn.unused_result=False, warn.unused_arg=True

import numpy as np
from cython.parallel import prange
from libc.math cimport sqrt, pi, tanh

# With Cython3: from libc.complex cimport cabs, cexp
# Accessing C code from cython (out of the scope for today)
cdef extern from "complex.h" nogil:
    double cabs(double complex)
    double complex cexp(double complex)

def circ_cython(int N, 
                double[::1] h, 
                double[::1] k,
                double e0,
                double w):
    cdef:
        double[:, ::1] result
        double N_2, p_n, p_m, strain, radius, n_v, m_v
        double complex tmp, two_j_pi,  value
        int i_h, i_k, m, n, h_size, k_size
        
    two_j_pi = np.pi*2j
    h_size = h.shape[0]
    k_size = k.shape[0]
    result = np.zeros((h_size, k_size))
    N_2 = N / 2.0
    for i_h in prange(h_size, nogil=True):  # loop over the reciprocal space coordinates
        for i_k in range(k_size):
            tmp = 0.0
            for n in range(N):  # loop and sum over unit-cells
                for m in range(N):
                    radius = sqrt((n - N_2)** 2 + (m - N_2)** 2)
                    if (radius > (N_2)):
                        value = 0.0
                    else:
                        strain = e0 * (1.0 + tanh((radius-N_2)/w))
                        p_n = n + strain*(n - N_2)
                        p_m = m + strain*(m - N_2)
                        value = cexp(2j*pi*(h[i_h]*p_n + k[i_k]*p_m))
                    tmp +=  value
            result[i_h, i_k] += cabs(tmp)**2
    return np.asarray(result)

In [None]:
%time intensity = circ_cython(N, h, k, e0, w)
print("Error measured:", validate_ci(intensity))