# Profiling

In [33]:
import time
from line_profiler import LineProfiler

In [34]:
def aux(i):
    i = i+1
    i = i**2
    time.sleep(0.001)
    return i

def fun(x):
    time.sleep(x)
    a = [i for i in range(100)]
    for i in range(100):
        a = aux(i)
    return 1

In [41]:
%time fun(1)

CPU times: user 826 µs, sys: 984 µs, total: 1.81 ms
Wall time: 1.11 s


1

In [None]:
%timeit fun(1)

In [45]:
%%timeit 
fun(1)
fun(0)

1.21 s ± 123 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Line Profiler

In [35]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


Profile `fun` and `aux` when running statement `fun(1)` and save the results in `profile_fun.txt`.

Run `%lprun?` for more options.

In [49]:
%lprun -f fun -f aux -T profile_fun.txt fun(1) 


*** Profile printout saved to text file 'profile_fun.txt'. 


Timer unit: 1e-09 s

Total time: 0.105616 s
File: /tmp/ipykernel_4135237/28764885.py
Function: aux at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def aux(i):
     2       100      27909.0    279.1      0.0      i = i+1
     3       100      52541.0    525.4      0.0      i = i**2
     4       100  105508301.0 1055083.0     99.9      time.sleep(0.001)
     5       100      27345.0    273.4      0.0      return i

Total time: 1.10703 s
File: /tmp/ipykernel_4135237/28764885.py
Function: fun at line 7

Line #      Hits         Time  Per Hit   % Time  Line Contents
     7                                           def fun(x):
     8         1 1001065412.0 1001065412.0     90.4      time.sleep(x)
     9         1      43150.0  43150.0      0.0      a=[i for i in range(100)]
    10       100      25756.0    257.6      0.0      for i in range(100):
    11       100  105894974.0 1058949.7      9.6          a=aux(i)
    1

Same behaviour using code

In [39]:
lp = LineProfiler()
lp_wrapper = lp(fun)
lp_wrapper(2)
lp.print_stats()

Timer unit: 1e-09 s

Total time: 2.10759 s
File: /tmp/ipykernel_4135237/28764885.py
Function: fun at line 7

Line #      Hits         Time  Per Hit   % Time  Line Contents
     7                                           def fun(x):
     8         1 2002027869.0 2002027869.0     95.0      time.sleep(x)
     9         1      32962.0  32962.0      0.0      a=[i for i in range(100)]
    10       100      29749.0    297.5      0.0      for i in range(100):
    11       100  105503168.0 1055031.7      5.0          a=aux(i)
    12         1        224.0    224.0      0.0      return 1



## Examples
Here are some examples of profiling.

### Example 1
One vs two lines

In [46]:
import numpy as np

In [47]:
%%timeit
A = np.random.uniform(size=(50,50,50))
A = A+1

1.3 ms ± 16.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [48]:
%%timeit
A = np.random.uniform(size=(50,50,50))+1

1.16 ms ± 10.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


### Example 2

In [23]:
N=10000

In [25]:
%%timeit
l=np.zeros(N)
for i in range(N):
    l=np.random.uniform()

23.9 ms ± 1.81 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [26]:
%%timeit
l=[np.random.uniform() for i in range(N)]

21.8 ms ± 7.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Example 3

In [70]:
i,j,k = (100,200,300)
A = np.random.uniform(size=(i,j,k))
B = np.random.uniform(size=(i,1,k))
C = np.random.uniform(size=(i,j,1))
D = np.random.uniform(size=(i,1,1))

In [71]:
%%timeit
np.tile(B-1,(1,j,1))

10.8 ms ± 363 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [72]:
%%timeit
np.tile(B,(1,j,1))-1

23.7 ms ± 974 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [68]:
%%timeit
np.minimum.reduce([A,
                   np.tile(B,(1,j,1)),
                   np.tile(C,(1,1,k)),
                   np.tile(D,(1,j,k))])

139 ms ± 6.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [69]:
%%timeit
np.array([A,
          np.tile(B,(1,j,1)),
          np.tile(C,(1,1,k)),
          np.tile(D,(1,j,k))]).min(axis=0)

156 ms ± 17.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [76]:
A=np.random.uniform(size=(2,3,4))
A

array([[[0.61450316, 0.95055704, 0.43870956, 0.64272652],
        [0.48077156, 0.72807989, 0.39790626, 0.91342951],
        [0.04958972, 0.29605232, 0.75903205, 0.45739163]],

       [[0.23427534, 0.21429707, 0.96865307, 0.43771724],
        [0.43174621, 0.52261765, 0.33950901, 0.11758247],
        [0.98283437, 0.6335777 , 0.24800674, 0.99446312]]])

In [146]:
p_grid = np.array([10,20,30])
w_grid = np.array([100,200,300,400])

In [155]:
idx = A.reshape(A.shape[0],-1).argmax(axis=-1)
idx = np.array(np.unravel_index(idx,(3,4)))
p_grid[idx[0]],w_grid[idx[1]]

(array([10, 30]), array([200, 400]))

array([[0, 2],
       [1, 3]], dtype=int64)

In [142]:
np.array(np.unravel_index([22,32,3,12],(7,6))).astype(int)

array([[3, 5, 0, 2],
       [4, 2, 3, 0]])

In [109]:
np.unravel_index(idx,A.shape[-2:])

(array([0, 2], dtype=int64), array([1, 3], dtype=int64))

### Example 4

In [6]:
import numpy as np
class Foo():
    def __init__(self):
        self.A = np.random.uniform(size=(100,100,100))
    
    def foo1(self, n:int=1000):
        for _ in range(n):
            B = self.A
    def foo2(self, n:int=1000):
        A = self.A
        for _ in range(n):
            B = A

In [7]:
%%timeit
Foo().foo1()

11.2 ms ± 2.78 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
%%timeit
Foo().foo1()

11.1 ms ± 2.17 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
