# Hamiltonian Speedup With Cython?

In [1]:
%load_ext cython
import Cython
print(Cython.__version__)

0.28.5


In [2]:
import numpy as np
from scipy.sparse import csr_matrix

In [3]:
def so_save(n, Lz, Lproduct):
    filename_Lz = f'Lz{n}.npy'
    filename_Lproduct = f'Lproduct{n}.npy'
    with open(filename_Lz, 'wb') as f:
        np.save(f, Lz)
    with open(filename_Lproduct, 'wb') as f:
        np.save(f, Lproduct)


def so_load(n):
    filename_Lz = f'Lz{n}.npy'
    filename_Lproduct = f'Lproduct{n}.npy'
    Lz = np.load(filename_Lz)
    Lproduct = np.load(filename_Lproduct)
    return Lz, Lproduct

If sparse_operators does not find a saved pre-calculated partial solution, it is clearly the bottleneck. If not, it's not clear yet if it's the bottleneck.


In [4]:
def sparse_operators(nspins):
    """"""
    try:
        Lz, Lproduct = so_load(nspins)
#         print(f'h{nspins} loaded')  # commented out for testing
        return Lz, Lproduct
    except FileNotFoundError:
        print('File not found')
    except OSError:
        print('OSError encountered')
    except KeyError:
        print('KeyError: could not find record')

    print(f'Creating h{nspins}')  
    # Define Pauli matrices
    sigma_x = np.matrix([[0, 1 / 2], [1 / 2, 0]])
    sigma_y = np.matrix([[0, -1j / 2], [1j / 2, 0]])
    sigma_z = np.matrix([[1 / 2, 0], [0, -1 / 2]])
    unit = np.matrix([[1, 0], [0, 1]])

    # The following empty arrays will be used to store the
    # Cartesian spin operators.
    Lx = np.empty((nspins), dtype='object')
    Ly = np.empty((nspins), dtype='object')
    Lz = np.empty((nspins), dtype='object')

    for n in range(nspins):
        Lx[n] = 1
        Ly[n] = 1
        Lz[n] = 1
        for k in range(nspins):
            if k == n:  # Diagonal element
                Lx[n] = np.kron(Lx[n], sigma_x)
                Ly[n] = np.kron(Ly[n], sigma_y)
                Lz[n] = np.kron(Lz[n], sigma_z)
            else:  # Off-diagonal element
                Lx[n] = np.kron(Lx[n], unit)
                Ly[n] = np.kron(Ly[n], unit)
                Lz[n] = np.kron(Lz[n], unit)

    #     print('Lx: ', Lx)
    #     print('Ly: ', Ly)
    #     print('Lz: ', Lz)
    Lcol = np.vstack((Lx, Ly, Lz)).real
    Lrow = Lcol.T  # As opposed to sparse version of code, this works!
    #     print('Lcol: ', Lcol.shape)
    #     print(Lcol)
    #     print('Lrow: ', Lrow.shape)
    #     print(Lrow)
    Lproduct = np.dot(Lrow, Lcol)
    #     print('Lproduct: ', Lproduct)
    # print(type(Lproduct), Lproduct.shape)
    # print(type(Lproduct[0, 0]), Lproduct[0, 0].shape)
    Lz_sparse = [csr_matrix(z) for z in Lz]

    for i in range(nspins):
        for j in range(nspins):
            Lproduct[i, j] = csr_matrix(Lproduct[i, j])

    #     Lproduct_sparse = csr_matrix(Lproduct)
    #     print(Lz_sparse)
    # print(Lproduct)
    so_save(nspins, Lz_sparse, Lproduct)

    return Lz_sparse, Lproduct


In [5]:
from simulation_data import spin8

In [6]:
v, j = spin8()

Once 8-spin files have been saved, only takes about 2.8ms to run sparse_operators.

In [7]:
timeit_result = %timeit -o sparse_operators(8)
np_time = timeit_result.average

2.72 ms ± 72.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


`test_so` is a modified version that will not use pre-saved solutions. This will be used to test how much cython can speed up the function.

In [8]:
def test_so(nspins):
    """"""  
    # Define Pauli matrices
    sigma_x = np.matrix([[0, 1 / 2], [1 / 2, 0]])
    sigma_y = np.matrix([[0, -1j / 2], [1j / 2, 0]])
    sigma_z = np.matrix([[1 / 2, 0], [0, -1 / 2]])
    unit = np.matrix([[1, 0], [0, 1]])

    # The following empty arrays will be used to store the
    # Cartesian spin operators.
    Lx = np.empty((nspins), dtype='object')
    Ly = np.empty((nspins), dtype='object')
    Lz = np.empty((nspins), dtype='object')

    for n in range(nspins):
        Lx[n] = 1
        Ly[n] = 1
        Lz[n] = 1
        for k in range(nspins):
            if k == n:  # Diagonal element
                Lx[n] = np.kron(Lx[n], sigma_x)
                Ly[n] = np.kron(Ly[n], sigma_y)
                Lz[n] = np.kron(Lz[n], sigma_z)
            else:  # Off-diagonal element
                Lx[n] = np.kron(Lx[n], unit)
                Ly[n] = np.kron(Ly[n], unit)
                Lz[n] = np.kron(Lz[n], unit)

    Lcol = np.vstack((Lx, Ly, Lz)).real
    Lrow = Lcol.T  # As opposed to sparse version of code, this works!
 
    Lproduct = np.dot(Lrow, Lcol)
    Lz_sparse = [csr_matrix(z) for z in Lz]

    for i in range(nspins):
        for j in range(nspins):
            Lproduct[i, j] = csr_matrix(Lproduct[i, j])

    return Lz_sparse, Lproduct

In [9]:
lz_old, lp_old = sparse_operators(3)
lz_new, lp_new = test_so(3)
print(lz_old[0])
print(lz_new[0])
# assert np.all(lz_old == lz_new)

  (0, 0)	0.5
  (1, 1)	0.5
  (2, 2)	0.5
  (3, 3)	0.5
  (4, 4)	-0.5
  (5, 5)	-0.5
  (6, 6)	-0.5
  (7, 7)	-0.5
  (0, 0)	0.5
  (1, 1)	0.5
  (2, 2)	0.5
  (3, 3)	0.5
  (4, 4)	-0.5
  (5, 5)	-0.5
  (6, 6)	-0.5
  (7, 7)	-0.5


ca. 0.4s timeit result straight python, 8-spin

In [10]:
timeit_result = %timeit -o test_so(8)
np_time = timeit_result.average

414 ms ± 6.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
def compare_time(current, reference, name):
    ratio = reference/current
    if ratio > 1:
        word = "faster"
    else:
        ratio = 1 / ratio 
        word = "slower"
        
    print("We are", "{0:.1f}".format(ratio), "times", word, "than the", name, "version.")

def print_report(compute_function):
#     assert np.all(compute_function(array_1, array_2, a, b, c) == np_result)
    timeit_result = %timeit -o compute_function(8)
    run_time = timeit_result.average
#     compare_time(run_time, py_time, "pure Python")
    compare_time(run_time, np_time, "NumPy")

In [12]:
%%cython -a
import numpy as np
from scipy.sparse import csr_matrix

def c1(nspins):
    """"""  
    # Define Pauli matrices
    sigma_x = np.matrix([[0, 1 / 2], [1 / 2, 0]])
    sigma_y = np.matrix([[0, -1j / 2], [1j / 2, 0]])
    sigma_z = np.matrix([[1 / 2, 0], [0, -1 / 2]])
    unit = np.matrix([[1, 0], [0, 1]])

    # The following empty arrays will be used to store the
    # Cartesian spin operators.
    Lx = np.empty((nspins), dtype='object')
    Ly = np.empty((nspins), dtype='object')
    Lz = np.empty((nspins), dtype='object')

    for n in range(nspins):
        Lx[n] = 1
        Ly[n] = 1
        Lz[n] = 1
        for k in range(nspins):
            if k == n:  # Diagonal element
                Lx[n] = np.kron(Lx[n], sigma_x)
                Ly[n] = np.kron(Ly[n], sigma_y)
                Lz[n] = np.kron(Lz[n], sigma_z)
            else:  # Off-diagonal element
                Lx[n] = np.kron(Lx[n], unit)
                Ly[n] = np.kron(Ly[n], unit)
                Lz[n] = np.kron(Lz[n], unit)

    Lcol = np.vstack((Lx, Ly, Lz)).real
    Lrow = Lcol.T  # As opposed to sparse version of code, this works!
 
    Lproduct = np.dot(Lrow, Lcol)
    Lz_sparse = [csr_matrix(z) for z in Lz]

    for i in range(nspins):
        for j in range(nspins):
            Lproduct[i, j] = csr_matrix(Lproduct[i, j])

    return Lz_sparse, Lproduct

In [13]:
print_report(c1)

428 ms ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
We are 1.0 times slower than the NumPy version.


let's take a stab at typing:

In [14]:
%%cython -a
import numpy as np
from scipy.sparse import csr_matrix

DTYPE = np.complex_

def c2(int nspins):
    """"""  
    # Define Pauli matrices
    sigma_x = np.matrix([[0, 1 / 2], [1 / 2, 0]])
    sigma_y = np.matrix([[0, -1j / 2], [1j / 2, 0]])
    sigma_z = np.matrix([[1 / 2, 0], [0, -1 / 2]])
    unit = np.matrix([[1, 0], [0, 1]])

    # The following empty arrays will be used to store the
    # Cartesian spin operators.
    Lx = np.empty((nspins), dtype='object')
    Ly = np.empty((nspins), dtype='object')
    Lz = np.empty((nspins), dtype='object')

    cdef Py_ssize_t n, k, i, j
    for n in range(nspins):
        Lx[n] = 1
        Ly[n] = 1
        Lz[n] = 1
        for k in range(nspins):
            if k == n:  # Diagonal element
                Lx[n] = np.kron(Lx[n], sigma_x)
                Ly[n] = np.kron(Ly[n], sigma_y)
                Lz[n] = np.kron(Lz[n], sigma_z)
            else:  # Off-diagonal element
                Lx[n] = np.kron(Lx[n], unit)
                Ly[n] = np.kron(Ly[n], unit)
                Lz[n] = np.kron(Lz[n], unit)

    Lcol = np.vstack((Lx, Ly, Lz)).real
    Lrow = Lcol.T  # As opposed to sparse version of code, this works!
 
    Lproduct = np.dot(Lrow, Lcol)
    Lz_sparse = [csr_matrix(z) for z in Lz]

    for i in range(nspins):
        for j in range(nspins):
            Lproduct[i, j] = csr_matrix(Lproduct[i, j])

    return Lz_sparse, Lproduct

In [15]:
print_report(c2)

487 ms ± 46.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
We are 1.2 times slower than the NumPy version.
