# References

* [Numba by Loic Gouarin](https://github.com/gouarin/cours_numba_2017)
* [Numba Documentation](http://numba.pydata.org/numba-doc/latest/index.html)

<img src="https://cdn.dribbble.com/users/915978/screenshots/3034118/numba_1x.jpg" alt="Drawing" style="width: 40%;"/>



In [24]:
import numpy as np

In [25]:
from numba import jit

@jit
def sum(a, b):
    return a + b

In [26]:
print(sum(1, 2))
print(sum(1j, 2))

3
(2+1j)


In [27]:
x = np.random.rand(10)
y = np.random.rand(10)
print(sum(x, y))

[ 0.60311027  1.22530298  1.82637806  0.46345741  0.7607347   0.94169324
  0.69141898  0.53092071  0.78677599  1.60992154]


In [28]:
x = np.random.rand(10000000)

In [29]:
%timeit x.sum()

4.12 ms ± 305 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [30]:
@jit
def numba_sum(x):
    res= 0
    for i in range(x.size):
        res += x[i]
    return res

In [31]:
%timeit numba_sum(x)

11.9 ms ± 624 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [32]:
@jit
def jit_sum(a, b):
    return a + b

In [33]:
jit_sum.inspect_types() # jit_sum has not been compiled

In [34]:
jit_sum(1, 2) # call it once with ints
jit_sum.inspect_types()

jit_sum (int64, int64)
--------------------------------------------------------------------------------
# File: <ipython-input-32-d23be499386c>
# --- LINE 1 --- 
# label 0
#   del b
#   del a
#   del $0.3

@jit

# --- LINE 2 --- 

def jit_sum(a, b):

    # --- LINE 3 --- 
    #   a = arg(0, name=a)  :: int64
    #   b = arg(1, name=b)  :: int64
    #   $0.3 = a + b  :: int64
    #   $0.4 = cast(value=$0.3)  :: int64
    #   return $0.4

    return a + b




In [35]:
jit_sum(1., 2.) # call it once with doubles
jit_sum.inspect_types()

jit_sum (int64, int64)
--------------------------------------------------------------------------------
# File: <ipython-input-32-d23be499386c>
# --- LINE 1 --- 
# label 0
#   del b
#   del a
#   del $0.3

@jit

# --- LINE 2 --- 

def jit_sum(a, b):

    # --- LINE 3 --- 
    #   a = arg(0, name=a)  :: int64
    #   b = arg(1, name=b)  :: int64
    #   $0.3 = a + b  :: int64
    #   $0.4 = cast(value=$0.3)  :: int64
    #   return $0.4

    return a + b


jit_sum (float64, float64)
--------------------------------------------------------------------------------
# File: <ipython-input-32-d23be499386c>
# --- LINE 1 --- 
# label 0
#   del b
#   del a
#   del $0.3

@jit

# --- LINE 2 --- 

def jit_sum(a, b):

    # --- LINE 3 --- 
    #   a = arg(0, name=a)  :: float64
    #   b = arg(1, name=b)  :: float64
    #   $0.3 = a + b  :: float64
    #   $0.4 = cast(value=$0.3)  :: float64
    #   return $0.4

    return a + b




In [36]:
for k, v in jit_sum.inspect_llvm().items():
    print(v)

; ModuleID = 'jit_sum'
source_filename = "<string>"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin15.3.0"

@.const.jit_sum = internal constant [8 x i8] c"jit_sum\00"
@".const.Fatal error: missing _dynfunc.Closure" = internal constant [38 x i8] c"Fatal error: missing _dynfunc.Closure\00"
@PyExc_RuntimeError = external global i8
@".const.missing Environment" = internal constant [20 x i8] c"missing Environment\00"

; Function Attrs: norecurse nounwind
define i32 @"_ZN8__main__12jit_sum$2417Exx"(i64* noalias nocapture %retptr, { i8*, i32 }** noalias nocapture readnone %excinfo, i8* noalias nocapture readnone %env, i64 %arg.a, i64 %arg.b) local_unnamed_addr #0 {
entry:
  %.15 = add nsw i64 %arg.b, %arg.a
  store i64 %.15, i64* %retptr, align 8
  ret i32 0
}

define i8* @"_ZN7cpython8__main__12jit_sum$2417Exx"(i8* %py_closure, i8* %py_args, i8* nocapture readnone %py_kws) local_unnamed_addr {
entry:
  %.5 = alloca i8*, align 8
  %.6 = alloca 

In [37]:
for k, v in jit_sum.inspect_asm().items():
    print(v)

	.section	__TEXT,__text,regular,pure_instructions
	.macosx_version_min 10, 11
	.globl	__ZN8__main__12jit_sum$2417Exx
	.p2align	4, 0x90
__ZN8__main__12jit_sum$2417Exx:
	addq	%r8, %rcx
	movq	%rcx, (%rdi)
	xorl	%eax, %eax
	retq

	.globl	__ZN7cpython8__main__12jit_sum$2417Exx
	.p2align	4, 0x90
__ZN7cpython8__main__12jit_sum$2417Exx:
	.cfi_startproc
	pushq	%r15
Lcfi0:
	.cfi_def_cfa_offset 16
	pushq	%r14
Lcfi1:
	.cfi_def_cfa_offset 24
	pushq	%r12
Lcfi2:
	.cfi_def_cfa_offset 32
	pushq	%rbx
Lcfi3:
	.cfi_def_cfa_offset 40
	subq	$24, %rsp
Lcfi4:
	.cfi_def_cfa_offset 64
Lcfi5:
	.cfi_offset %rbx, -40
Lcfi6:
	.cfi_offset %r12, -32
Lcfi7:
	.cfi_offset %r14, -24
Lcfi8:
	.cfi_offset %r15, -16
	movq	%rdi, %rbx
	movabsq	$_.const.jit_sum, %r10
	movabsq	$_PyArg_UnpackTuple, %r11
	leaq	16(%rsp), %r8
	leaq	8(%rsp), %r9
	movl	$2, %edx
	movl	$2, %ecx
	xorl	%eax, %eax
	movq	%rsi, %rdi
	movq	%r10, %rsi
	callq	*%r11
	testl	%eax, %eax
	je	LBB1_1
	testq	%rbx, %rbx
	je	LBB1_15
	cmpq	$0, 24(%rbx)
	je	LBB1_5
	movq	16

In [38]:
jit_sum.py_func(1, 2) # call origin python function without numba process

3

### Types coercion

In [39]:
@jit(['int32[:](int32[:], int32[:])',
      'int32(int32, int32)'])
def product(a, b):
    return a*b

In [40]:
product(2, 3)

6

In [41]:
product(2.2, 3.2)

6

In [42]:
a = np.arange(10, dtype=np.int32)
b = np.arange(10, dtype=np.int32)

product(a, b)

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

In [50]:
a = np.random.random(10) # Numpy arrays contain double by default
b = np.random.random(10)
try:
    product(a, b)
except TypeError as e:
    print("TypeError:",e)

TypeError: No matching definition for argument type(s) array(float64, 1d, C), array(float64, 1d, C)


## Numba types
```C
void,
intp, uintp,
intc, uintc,
int8, uint8, int16, uint16, int32, uint32, int64, uint64,
float32, float64,
complex64, complex128.
```
### Arrays
```C
float32[:] 
float64[:, :]
```

## Numba flags

### nopython
Compilation fails if you use pure Python objects.
### nogil
Release Global Interpreter Lock (GIL). Enable this option when you use threads.
### cache
Do not recompile the function.

# Inlining


In [55]:
import math
from numba import njit

@njit
def square(x):
    return x ** 2

@njit
def hypot(x, y):
    return math.sqrt(square(x) + square(y))

In [56]:
hypot(2., 3.)

3.605551275463989

In [58]:
for k, v in hypot.inspect_asm().items():
    print(v)

	.section	__TEXT,__text,regular,pure_instructions
	.macosx_version_min 10, 11
	.globl	__ZN8__main__10hypot$2424Edd
	.p2align	4, 0x90
__ZN8__main__10hypot$2424Edd:
	vmulsd	%xmm0, %xmm0, %xmm0
	vmulsd	%xmm1, %xmm1, %xmm1
	vaddsd	%xmm1, %xmm0, %xmm0
	vsqrtsd	%xmm0, %xmm0, %xmm0
	vmovsd	%xmm0, (%rdi)
	xorl	%eax, %eax
	retq

	.globl	__ZN7cpython8__main__10hypot$2424Edd
	.p2align	4, 0x90
__ZN7cpython8__main__10hypot$2424Edd:
	.cfi_startproc
	pushq	%r15
Lcfi0:
	.cfi_def_cfa_offset 16
	pushq	%r14
Lcfi1:
	.cfi_def_cfa_offset 24
	pushq	%r13
Lcfi2:
	.cfi_def_cfa_offset 32
	pushq	%r12
Lcfi3:
	.cfi_def_cfa_offset 40
	pushq	%rbx
Lcfi4:
	.cfi_def_cfa_offset 48
	subq	$32, %rsp
Lcfi5:
	.cfi_def_cfa_offset 80
Lcfi6:
	.cfi_offset %rbx, -48
Lcfi7:
	.cfi_offset %r12, -40
Lcfi8:
	.cfi_offset %r13, -32
Lcfi9:
	.cfi_offset %r14, -24
Lcfi10:
	.cfi_offset %r15, -16
	movq	%rdi, %rbx
	movabsq	$_.const.hypot, %r10
	movabsq	$_PyArg_UnpackTuple, %r11
	leaq	24(%rsp), %r8
	leaq	16(%rsp), %r9
	movl	$2, %edx
	movl	$2, %