# Timing for QobjEvo

Made by Eric Giguere

In [1]:
import qutip as qt
import numpy as np
from qutip import QobjEvo
%load_ext cython

In [2]:
N = 10
destroy, create, Id = qt.destroy(N), qt.create(N), qt.qeye(N)
def exp_i(t,args):
    return np.exp(-1j*t)
def cos_w(t,args):
    return np.cos(args["w"]*t)
tlist = np.linspace(0,10,10000)
tlistlog = np.logspace(-3,1,10000)

# state vector as np array
vec = np.arange(N)*.5+.5j
vec_super = np.arange(N**2)*.5+.5j
mat_c = (np.arange(N**2)*.5+.5j).reshape((10,10))
mat_f = np.asfortranarray(mat_c*1.)

# Construct QobjEvo of all type
td_cte1 = QobjEvo(Id)
td_cte2 = QobjEvo([Id])

td_func = QobjEvo([Id,[create,exp_i],[destroy,cos_w]],args={"w":2})
td_str = QobjEvo([Id,[create,"exp(-1j*t)"],[destroy,"cos(w*t)"]],args={'w':2.})
td_array = QobjEvo([Id,[create,np.exp(-1j*tlist)],[destroy,np.cos(2*tlist)]],tlist=tlist)
td_array_log = QobjEvo([Id,[create,np.exp(-1j*tlistlog)],[destroy,np.cos(2*tlistlog)]],tlist=tlistlog)

td_super = qt.liouvillian(td_func, c_ops=td_cte1)

## Compilation and speed

Timing check for all coeff formats.  
Still use the old "get_compiled_call" which kept the python and cython function separated.  
Now once compiled, the cython method is used by the usual method.

In [3]:
# The compiled version build the scipy object from the sparce matrix if not used with data=True
print("Time for cte")
td_cte1.compiled = False
%timeit td_cte1(2)
%timeit td_cte1(2, data=True)
print("Time for compiled cte")
td_cte1.compile()
%timeit td_cte1(2)
%timeit td_cte1(2, data=True)

Time for cte
35.4 µs ± 377 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
7.89 µs ± 367 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Time for compiled cte
34.9 µs ± 125 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
7.68 µs ± 69.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [4]:
print("Time for str")
td_str.compiled = False
%timeit td_str(2)
%timeit td_str(2, data=True)
td_str.compile()
%timeit td_str(2)
%timeit td_str(2, data=True)

Time for str
528 µs ± 2.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
169 µs ± 1.37 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
48.7 µs ± 382 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
9 µs ± 490 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
print("Time for function")
td_func.compiled = False
%timeit td_func(2)
%timeit td_func(2, data=True)
td_func.compile()
%timeit td_func(2)
%timeit td_func(2, data=True)

Time for function
476 µs ± 27.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
135 µs ± 5.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
63.3 µs ± 3.89 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
13.7 µs ± 431 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [6]:
print("Time for array")
td_array.compiled = False
%timeit td_array(2)
%timeit td_array(2, data=True)
td_array.compile()
%timeit td_array(2)
%timeit td_array(2, data=True)

Time for array
537 µs ± 7.25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
173 µs ± 2.39 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
48.3 µs ± 460 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.64 µs ± 145 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [7]:
print("Time for array with variable time step")
td_array_log.compiled = False
%timeit td_array_log(2)
%timeit td_array_log(2, data=True)
td_array_log.compile()
%timeit td_array_log(2)
%timeit td_array_log(2, data=True)

Time for array with variable time step
537 µs ± 11.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
175 µs ± 1.62 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
48.9 µs ± 201 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.45 µs ± 121 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing for rhs vs spmv

In [8]:
print("timing for rhs: cte")
td_cte1.compiled = False
%timeit qt.cy.spmv(td_cte1(2, data=True), vec)
%timeit qt.cy.spmv(Id.data, vec)
%timeit td_cte1.mul_vec(2,vec)
td_cte1.compile()
%timeit td_cte1.mul_vec(2,vec)

timing for rhs: cte
13.8 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.63 µs ± 43 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
15.1 µs ± 142 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.94 µs ± 78.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
print("timing for rhs: str")
td_str.compiled = False
%timeit qt.cy.spmv(td_str(2, data=True), vec)
%timeit td_str.mul_vec(2,vec)
td_str.compile()
%timeit td_str.mul_vec(2,vec)

timing for rhs: str
188 µs ± 876 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
189 µs ± 4.32 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4.39 µs ± 83.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [10]:
print("timing for rhs: cte")
td_func.compiled = False
%timeit qt.cy.spmv(td_func(2, data=True), vec)
%timeit td_func.mul_vec(2,vec)
td_func.compile()
%timeit td_func.mul_vec(2,vec)

timing for rhs: cte
138 µs ± 1.15 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
139 µs ± 894 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.8 µs ± 59.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
print("timing for rhs: cte")
td_array.compiled = False
%timeit qt.cy.spmv(td_array(2, data=True), vec)
%timeit td_array.mul_vec(2,vec)
td_array.compile()
%timeit td_array.mul_vec(2,vec)

timing for rhs: cte
187 µs ± 1.83 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
188 µs ± 1.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.38 µs ± 42.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [12]:
print("timing for rhs: cte")
td_array_log.compiled = False
%timeit qt.cy.spmv(td_array_log(2, data=True), vec)
%timeit td_array_log.mul_vec(2,vec)
td_array_log.compile()
%timeit td_array_log.mul_vec(2,vec)

timing for rhs: cte
187 µs ± 2.51 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
190 µs ± 2.83 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.51 µs ± 112 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing for spmm

In [13]:
print("timing for mul_mat: c format sparce")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_c
%timeit td_str.mul_mat(2,mat_c)
td_str.compile()
%timeit td_str.mul_mat(2,mat_c)

timing for mul_mat: c format sparce
200 µs ± 16.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
214 µs ± 21.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.35 µs ± 390 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [14]:
print("timing for mul_mat: c format dense")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_c
%timeit td_str.mul_mat(2,mat_c)
td_str.compile(dense=True)
%timeit td_str.mul_mat(2,mat_c)

timing for mul_mat: c format dense
184 µs ± 1.98 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
186 µs ± 8.07 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
9.94 µs ± 361 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [15]:
print("timing for mul_mat: f format sparce")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_f
%timeit td_str.mul_mat(2,mat_f)
td_str.compile()
%timeit td_str.mul_mat(2,mat_f)

timing for mul_mat: f format sparce
184 µs ± 409 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
188 µs ± 1.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
5.59 µs ± 166 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [16]:
print("timing for mul_mat: f format dense")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_f
%timeit td_str.mul_mat(2,mat_f)
td_str.compile(dense=True)
%timeit td_str.mul_mat(2,mat_f)

timing for mul_mat: f format dense
183 µs ± 433 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
193 µs ± 8.92 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
12.6 µs ± 813 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing for expect

In [17]:
print("timing for expect: cte")
td_cte1.compiled = False
%timeit qt.cy.cy_expect_psi(td_cte1(2, data=True), vec, 0)
%timeit td_cte1.expect(2,vec,0)
td_cte1.compile()
%timeit td_cte1.expect(2,vec,0)

timing for expect: cte
13.2 µs ± 576 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
15.4 µs ± 782 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
5.4 µs ± 531 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [18]:
print("timing for expect: str")
td_str.compiled = False
%timeit qt.cy.cy_expect_psi(td_str(2, data=True), vec, 0)
%timeit td_str.expect(2,vec,0)
td_str.compile()
%timeit td_str.expect(2,vec,0)

timing for expect: str
193 µs ± 22.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
213 µs ± 24.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4.99 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [19]:
print("timing for expect: func")
td_func.compiled = False
%timeit qt.cy.cy_expect_psi(td_func(2, data=True), vec, 0)
%timeit td_func.expect(2,vec,0)
td_func.compile()
%timeit td_func.expect(2,vec,0)

timing for expect: func
147 µs ± 7.48 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
150 µs ± 14 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
10.6 µs ± 816 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [20]:
print("timing for expect: array")
td_array.compiled = False
%timeit qt.cy.cy_expect_psi(td_array(2, data=True), vec, 0)
%timeit td_array.expect(2,vec,0)
td_array.compile()
%timeit td_array.expect(2,vec,0)

timing for expect: array
184 µs ± 3.32 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
184 µs ± 557 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.8 µs ± 15.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [21]:
print("timing for expect: array logscale")
td_array_log.compiled = False
%timeit qt.cy.cy_expect_psi(td_array_log(2, data=True), vec, 0)
%timeit td_array_log.expect(2,vec,0)
td_array_log.compile()
%timeit td_array_log.expect(2,vec,0)

timing for expect: array logscale
184 µs ± 4.65 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
187 µs ± 3.57 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.73 µs ± 17.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [22]:
print("timing for expect_rho: func")
td_super.compiled = False
%timeit qt.cy.cy_expect_rho_vec(td_super(2, data=True), vec_super, 0)
%timeit td_super.expect(2,vec_super,0)
td_super.compile()
%timeit td_super.expect(2,vec_super,0)

timing for expect_rho: func
145 µs ± 2.79 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
146 µs ± 5.71 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
7.68 µs ± 674 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing with_args

In [23]:
td_args = QobjEvo([Id,[destroy,cos_w]],args={"w":1})

In [24]:
%timeit td_args.with_args(2,{"w":2})
%timeit td_args(2)
%timeit td_args.with_args(2,{"w":2},data=True)
%timeit td_args(2,data=True)
td_args.compile()
%timeit td_args.with_args(2,{"w":2})
%timeit td_args(2)
%timeit td_args.with_args(2,{"w":2},data=True)
%timeit td_args(2,data=True)

297 µs ± 19 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
259 µs ± 17.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
72.3 µs ± 6.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
69 µs ± 4.55 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
72.1 µs ± 3.99 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
57.7 µs ± 3.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
14.6 µs ± 211 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11.3 µs ± 388 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [25]:
td_args = QobjEvo([Id,[destroy,"cos(w*t)"]],args={"w":1})

In [26]:
%timeit td_args.with_args(2,{"w":2})
%timeit td_args(2)
%timeit td_args.with_args(2,{"w":2},data=True)
%timeit td_args(2,data=True)
td_args.compile()
%timeit td_args.with_args(2,{"w":2})
%timeit td_args(2)
%timeit td_args.with_args(2,{"w":2},data=True)
%timeit td_args(2,data=True)

297 µs ± 9.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
281 µs ± 683 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
94.8 µs ± 4.45 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
92.8 µs ± 5.13 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
84.7 µs ± 108 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
50.7 µs ± 3.61 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
31.4 µs ± 239 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.43 µs ± 538 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing with_state

In [27]:
def coeff_state(t, psi, args):
    return np.max(psi)*args["w"]
td_state = QobjEvo([Id, [destroy, coeff_state]],args={"w":1})
%timeit td_state.with_state(2,vec)
td_state.compile()
%timeit td_state.with_state(2,vec)

282 µs ± 30 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
62.9 µs ± 2.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
