# Timing for QobjEvo

Made by Eric Giguere

In [1]:
import qutip as qt
import numpy as np
from qutip import QobjEvo
%load_ext cython

In [2]:
N = 10
destroy, create, Id = qt.destroy(N), qt.create(N), qt.qeye(N)
def exp_i(t,args):
    return np.exp(-1j*t)
def cos_w(t,args):
    return np.cos(args["w"]*t)
tlist = np.linspace(0,10,10000)
tlistlog = np.logspace(-3,1,10000)

# state vector as np array
vec = np.arange(N)*.5+.5j
vec_super = np.arange(N**2)*.5+.5j
mat_c = (np.arange(N**2)*.5+.5j).reshape((10,10))
mat_f = np.asfortranarray(mat_c*1.)

# Construct QobjEvo of all type
td_cte1 = QobjEvo(Id)
td_cte2 = QobjEvo([Id])

td_func = QobjEvo([Id,[create,exp_i],[destroy,cos_w]],args={"w":2})
td_str = QobjEvo([Id,[create,"exp(-1j*t)"],[destroy,"cos(w*t)"]],args={'w':2.})
td_array = QobjEvo([Id,[create,np.exp(-1j*tlist)],[destroy,np.cos(2*tlist)]],tlist=tlist)
td_array_log = QobjEvo([Id,[create,np.exp(-1j*tlistlog)],[destroy,np.cos(2*tlistlog)]],tlist=tlistlog)

td_super = qt.liouvillian(td_func, c_ops=td_cte1)

## Compilation and speed

Timing check for all coeff formats.  
Still use the old "get_compiled_call" which kept the python and cython function separated.  
Now once compiled, the cython method is used by the usual method.

In [3]:
# The compiled version build the scipy object from the sparce matrix if not used with data=True
print("Time for cte")
td_cte1.compiled = False
%timeit td_cte1(2)
%timeit td_cte1(2, data=True)
print("Time for compiled cte")
td_cte1.compile()
%timeit td_cte1(2)
%timeit td_cte1(2, data=True)

Time for cte
52.3 µs ± 704 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
11.1 µs ± 208 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Time for compiled cte
52.5 µs ± 400 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
11.1 µs ± 87.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [4]:
print("Time for str")
td_str.compiled = False
%timeit td_str(2)
%timeit td_str(2, data=True)
td_str.compile()
%timeit td_str(2)
%timeit td_str(2, data=True)

Time for str
596 µs ± 5.69 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
188 µs ± 708 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
68.6 µs ± 65.2 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
12.9 µs ± 33 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
print("Time for function")
td_func.compiled = False
%timeit td_func(2)
%timeit td_func(2, data=True)
td_func.compile()
%timeit td_func(2)
%timeit td_func(2, data=True)

Time for function
532 µs ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
146 µs ± 2.63 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
77.9 µs ± 2.03 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
20.3 µs ± 144 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
print("Time for array")
td_array.compiled = False
%timeit td_array(2)
%timeit td_array(2, data=True)
td_array.compile()
%timeit td_array(2)
%timeit td_array(2, data=True)

Time for array
576 µs ± 1.56 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
186 µs ± 2.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
67.9 µs ± 190 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
12.4 µs ± 209 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [7]:
print("Time for array with variable time step")
td_array_log.compiled = False
%timeit td_array_log(2)
%timeit td_array_log(2, data=True)
td_array_log.compile()
%timeit td_array_log(2)
%timeit td_array_log(2, data=True)

Time for array with variable time step
577 µs ± 10.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
184 µs ± 446 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
68 µs ± 86.1 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
12.5 µs ± 53.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing for rhs vs spmv

In [8]:
print("timing for rhs: cte")
td_cte1.compiled = False
%timeit qt.cy.spmv(td_cte1(2, data=True), vec)
%timeit qt.cy.spmv(Id.data, vec)
%timeit td_cte1.mul_vec(2,vec)
td_cte1.compile()
%timeit td_cte1.mul_vec(2,vec)

timing for rhs: cte
19.4 µs ± 132 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
5.78 µs ± 113 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
23.6 µs ± 869 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
5.89 µs ± 20.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
print("timing for rhs: str")
td_str.compiled = False
%timeit qt.cy.spmv(td_str(2, data=True), vec)
%timeit td_str.mul_vec(2,vec)
td_str.compile()
%timeit td_str.mul_vec(2,vec)

timing for rhs: str
216 µs ± 16.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
216 µs ± 14.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.74 µs ± 201 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [10]:
print("timing for rhs: cte")
td_func.compiled = False
%timeit qt.cy.spmv(td_func(2, data=True), vec)
%timeit td_func.mul_vec(2,vec)
td_func.compile()
%timeit td_func.mul_vec(2,vec)

timing for rhs: cte
155 µs ± 472 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
158 µs ± 139 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
16 µs ± 44.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
print("timing for rhs: cte")
td_array.compiled = False
%timeit qt.cy.spmv(td_array(2, data=True), vec)
%timeit td_array.mul_vec(2,vec)
td_array.compile()
%timeit td_array.mul_vec(2,vec)

timing for rhs: cte
198 µs ± 2.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
200 µs ± 2.66 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.13 µs ± 25.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [12]:
print("timing for rhs: cte")
td_array_log.compiled = False
%timeit qt.cy.spmv(td_array_log(2, data=True), vec)
%timeit td_array_log.mul_vec(2,vec)
td_array_log.compile()
%timeit td_array_log.mul_vec(2,vec)

timing for rhs: cte
200 µs ± 4.15 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
201 µs ± 4.74 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.2 µs ± 54.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing for spmm

In [13]:
print("timing for mul_mat: c format sparce")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_c
%timeit td_str.mul_mat(2,mat_c)
td_str.compile()
%timeit td_str.mul_mat(2,mat_c)

timing for mul_mat: c format sparce
204 µs ± 4.72 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
223 µs ± 19.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
11.3 µs ± 673 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [14]:
print("timing for mul_mat: c format dense")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_c
%timeit td_str.mul_mat(2,mat_c)
td_str.compile(dense=True)
%timeit td_str.mul_mat(2,mat_c)

timing for mul_mat: c format dense
206 µs ± 5.79 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
211 µs ± 5.02 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
14.3 µs ± 89.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [15]:
print("timing for mul_mat: f format sparce")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_f
%timeit td_str.mul_mat(2,mat_f)
td_str.compile()
%timeit td_str.mul_mat(2,mat_f)

timing for mul_mat: f format sparce
214 µs ± 21.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
231 µs ± 22.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
13 µs ± 48.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [16]:
print("timing for mul_mat: f format dense")
td_str.compiled = False
%timeit td_str(2, data=True) * mat_f
%timeit td_str.mul_mat(2,mat_f)
td_str.compile(dense=True)
%timeit td_str.mul_mat(2,mat_f)

timing for mul_mat: f format dense
210 µs ± 3.47 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
222 µs ± 9.36 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
14.4 µs ± 38.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing for expect

In [17]:
print("timing for expect: cte")
td_cte1.compiled = False
%timeit qt.cy.cy_expect_psi(td_cte1(2, data=True), vec, 0)
%timeit td_cte1.expect(2,vec,0)
td_cte1.compile()
%timeit td_cte1.expect(2,vec,0)

timing for expect: cte
16 µs ± 31.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
23 µs ± 2.09 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
7.92 µs ± 27.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [18]:
print("timing for expect: str")
td_str.compiled = False
%timeit qt.cy.cy_expect_psi(td_str(2, data=True), vec, 0)
%timeit td_str.expect(2,vec,0)
td_str.compile()
%timeit td_str.expect(2,vec,0)

timing for expect: str
212 µs ± 19.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
216 µs ± 15.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
10.7 µs ± 16.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [19]:
print("timing for expect: func")
td_func.compiled = False
%timeit qt.cy.cy_expect_psi(td_func(2, data=True), vec, 0)
%timeit td_func.expect(2,vec,0)
td_func.compile()
%timeit td_func.expect(2,vec,0)

timing for expect: func
154 µs ± 1.85 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
161 µs ± 2.91 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
18.1 µs ± 108 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [20]:
print("timing for expect: array")
td_array.compiled = False
%timeit qt.cy.cy_expect_psi(td_array(2, data=True), vec, 0)
%timeit td_array.expect(2,vec,0)
td_array.compile()
%timeit td_array.expect(2,vec,0)

timing for expect: array
200 µs ± 15.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
204 µs ± 5.04 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
10.3 µs ± 102 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [21]:
print("timing for expect: array logscale")
td_array_log.compiled = False
%timeit qt.cy.cy_expect_psi(td_array_log(2, data=True), vec, 0)
%timeit td_array_log.expect(2,vec,0)
td_array_log.compile()
%timeit td_array_log.expect(2,vec,0)

timing for expect: array logscale
201 µs ± 16.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
212 µs ± 34.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
10.4 µs ± 65.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [22]:
print("timing for expect_rho: func")
td_super.compiled = False
%timeit qt.cy.cy_expect_rho_vec(td_super(2, data=True), vec_super, 0)
%timeit td_super.expect(2,vec_super,0)
td_super.compile()
%timeit td_super.expect(2,vec_super,0)

timing for expect_rho: func
161 µs ± 392 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
167 µs ± 2.61 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
15.2 µs ± 193 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing with_args

In [23]:
td_args = QobjEvo([Id,[destroy,cos_w]],args={"w":1})

In [25]:
%timeit td_args(2,args={"w":2})
%timeit td_args(2)
%timeit td_args(2,args={"w":2},data=True)
%timeit td_args(2,data=True)
td_args.compile()
%timeit td_args(2,args={"w":2})
%timeit td_args(2)
%timeit td_args(2,args={"w":2},data=True)
%timeit td_args(2,data=True)

295 µs ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
291 µs ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
82.7 µs ± 1.47 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
79.7 µs ± 1.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
300 µs ± 17.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
73.4 µs ± 393 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
83.3 µs ± 2.86 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
16.6 µs ± 21.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [26]:
td_args = QobjEvo([Id,[destroy,"cos(w*t)"]],args={"w":1})

In [27]:
%timeit td_args(2,args={"w":2})
%timeit td_args(2)
%timeit td_args(2,args={"w":2},data=True)
%timeit td_args(2,data=True)
td_args.compile()
%timeit td_args(2,args={"w":2})
%timeit td_args(2)
%timeit td_args(2,args={"w":2},data=True)
%timeit td_args(2,data=True)

330 µs ± 2.81 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
326 µs ± 1.49 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
106 µs ± 860 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
103 µs ± 1.25 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
328 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
67.3 µs ± 174 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
104 µs ± 2.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
12.1 µs ± 214 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Timing with_state

In [29]:
def coeff_state(t, args):
    return np.max(args["vec"])*args["w"]
td_state = QobjEvo([Id, [destroy, coeff_state]],args={"w":1,"vec=vec":vec})
%timeit td_state(2, state=vec, data=True)
td_state.compile()
%timeit td_state(2, state=vec, data=True)

89.2 µs ± 235 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
29 µs ± 183 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
