<img src="https://upload.wikimedia.org/wikipedia/commons/1/1a/NumPy_logo.svg" width="300" align="left"/>

* [NUMPY: 1. Creating Arrays](#1)
* [NUMPY: 2. Query & Load/Save](#2)
* [NUMPY: 3. Universal Functions](#3)

In [98]:
from IPython.display import IFrame

<a id='1'></a>

# NUMPY: 1. Creating Arrays

In [99]:
IFrame(src='https://www.youtube.com/embed/YpBUiEsTiEA', width=640, height=400)

In [100]:
import numpy as np # np is a commonly used shorthand

# Type (Python Standard Library)

In [101]:
print( type(1), type(1.0), type('a string'), type(True) )

<class 'int'> <class 'float'> <class 'str'> <class 'bool'>


# List (Python Standard Library)

In [102]:
my_list = [1, 1.0, False, 'a string', True]

start:stop:step notation

In [103]:
print( my_list[0], my_list[-1], my_list[:2], my_list[-2:], my_list[::2], my_list[::-1] )

1 True [1, 1.0] ['a string', True] [1, False, True] [True, 'a string', False, 1.0, 1]


In [104]:
[ i+j for i in 'abc' for j in 'abc' ]

['aa', 'ab', 'ac', 'ba', 'bb', 'bc', 'ca', 'cb', 'cc']

In [105]:
['{:d} * {:d} = {:d}'.format(i,j,i*j) for i in [1,2,3] for j in [4,5]]

['1 * 4 = 4',
 '1 * 5 = 5',
 '2 * 4 = 8',
 '2 * 5 = 10',
 '3 * 4 = 12',
 '3 * 5 = 15']

In [106]:
[ 1, 2, 3, 4 ] * 3

[1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]

# Range (Python Standard Library)

In [107]:
l = [0,1,2]
for i in l:
    print(i)
r = range(3)
for i in r:
    print(i)

0
1
2
0
1
2


In [108]:
l

[0, 1, 2]

In [109]:
r

range(0, 3)

In [110]:
l == list(r)

True

In [111]:
[ i for i in range(100) if (i%7)==0 ]

[0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98]

# Len (Python Standard Library)

In [112]:
len([1, 1.0, False, 'a string', True])

5

In [113]:
len(range(10))

10

# Functions

In [114]:
def divideMax(l):
    """Divides the items of a list of numbers by the maximum value
    
    Parameters
    ----------
    l : a list of numerical values
    
    Returns
    ----------
    out : a list of numerical values
    
    """
    return [ i/max(l) for i in l ]

In [115]:
divideMax?

In [116]:
divideMax( list(range(5)) )

[0.0, 0.25, 0.5, 0.75, 1.0]

In [117]:
divideMax_lambda = lambda x : [ i/max(x) for i in x ]

In [118]:
divideMax_lambda( list(range(5)) )

[0.0, 0.25, 0.5, 0.75, 1.0]

In [119]:
type(divideMax)

function

In [120]:
type(divideMax_lambda)

function

In [121]:
divideMax_lambda.__name__

'<lambda>'

In [122]:
divideMax_lambda.__name__ = 'divideMax'

In [123]:
divideMax_lambda.__name__

'divideMax'

In [124]:
divideMax.__name__

'divideMax'

# NumPy Arrays

### Creating Arrays

In [125]:
array = np.zeros(shape=(2,2))
array

array([[0., 0.],
       [0., 0.]])

In [126]:
array = np.ones(shape=(5,))
array

array([1., 1., 1., 1., 1.])

In [127]:
array = np.empty(shape=(2,4,3))
array

array([[[-1.28822975e-231, -1.28822975e-231,  6.42285340e-323],
        [ 0.00000000e+000,  2.12199579e-314,  5.30276956e+180],
        [ 5.05117710e-038,  4.57046137e-071,  4.46548998e-086],
        [ 3.35959164e-143,  6.01433264e+175,  6.93885958e+218]],

       [[ 5.56218858e+180,  3.94356143e+180,  6.70022249e+170],
        [ 9.95166985e-043,  1.45970129e+185,  5.99082611e-066],
        [ 6.14665997e-144,  1.50008929e+248, -3.00254942e-229],
        [ 2.00390022e+000, -1.28822975e-231, -3.11109135e+231]]])

In [128]:
array = np.linspace(start=0,stop=10,num=5)
array

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [129]:
array = np.arange(start=0,stop=10,step=2.5)
array

array([0. , 2.5, 5. , 7.5])

In [130]:
array = np.arange(start=0,stop=10,step=2.5,dtype='complex64')
array

array([0. +0.j, 2.5+0.j, 5. +0.j, 7.5+0.j], dtype=complex64)

### Arguments

In [131]:
array = np.empty(shape=(2,4,3))

In [132]:
array.ndim

3

In [133]:
array.shape

(2, 4, 3)

In [134]:
array.size

24

In [135]:
array.dtype

dtype('float64')

<a id='2'></a>

# NUMPY: 2. Query & Load/Save

In [136]:
IFrame(src='https://www.youtube.com/embed/2xJsNi3wk-s', width=640, height=400)

### Reshape

In [137]:
array1d = np.arange(start=0,stop=22,step=2.5)
array1d.shape

(9,)

In [138]:
array2d = array1d.reshape( (3,3) )
array2d.shape

(3, 3)

### Indexing and Slicing

start:stop:step notation

In [139]:
print( array1d[0], array1d[-1], array1d[:2], array1d[-2:], array1d[::2], array1d[::-1] )

0.0 20.0 [0.  2.5] [17.5 20. ] [ 0.  5. 10. 15. 20.] [20.  17.5 15.  12.5 10.   7.5  5.   2.5  0. ]


In [140]:
array2d

array([[ 0. ,  2.5,  5. ],
       [ 7.5, 10. , 12.5],
       [15. , 17.5, 20. ]])

In [141]:
array2d[0] # first row

array([0. , 2.5, 5. ])

In [142]:
array2d[0,1] # first row, second column

2.5

In [143]:
array2d[::-1] # reversed order of the rows

array([[15. , 17.5, 20. ],
       [ 7.5, 10. , 12.5],
       [ 0. ,  2.5,  5. ]])

In [144]:
array2d[:,::-1] # reversed order of the columns

array([[ 5. ,  2.5,  0. ],
       [12.5, 10. ,  7.5],
       [20. , 17.5, 15. ]])

In [145]:
array1d

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. ])

In [146]:
print(array1d.shape, array1d.ndim)

(9,) 1


In [147]:
print( array1d[np.newaxis,:].shape, array1d[np.newaxis,:].ndim )

(1, 9) 2


In [148]:
print( array1d[:,np.newaxis].shape, array1d[:,np.newaxis].ndim )

(9, 1) 2


In [149]:
array1d[ [0,3,4] ]

array([ 0. ,  7.5, 10. ])

In [150]:
array1d[ [True,False,False,True,True,False,False,False,False] ]

array([ 0. ,  7.5, 10. ])

### Fancy Indexing

In [151]:
array2d

array([[ 0. ,  2.5,  5. ],
       [ 7.5, 10. , 12.5],
       [15. , 17.5, 20. ]])

In [152]:
array2d[[0,1,2],[0,1,2]]

array([ 0., 10., 20.])

In [153]:
array2d[range(3),range(3)]

array([ 0., 10., 20.])

### Boolean Masking

In [154]:
array1d

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. ])

In [155]:
array1d[ [True,False,False,True,True,False,False,False,False] ]

array([ 0. ,  7.5, 10. ])

In [156]:
array2d[ array2d % 10 == 0 ]

array([ 0., 10., 20.])

### Loading and Saving

In [157]:
!echo "# some integers\n@ in two columns\n0 1\n1 2\n2 3\n3 4\n4 5\n" > data/values.txt
!head data/values.txt

# some integers
@ in two columns
0 1
1 2
2 3
3 4
4 5



In [158]:
a = np.loadtxt('data/values.txt',comments=('#','@'),dtype=int)
a

array([[0, 1],
       [1, 2],
       [2, 3],
       [3, 4],
       [4, 5]])

In [159]:
a = np.loadtxt('data/values.txt',skiprows=2)
a

array([[0., 1.],
       [1., 2.],
       [2., 3.],
       [3., 4.],
       [4., 5.]])

In [160]:
a,b = np.loadtxt('data/values.txt',skiprows=2,unpack=True)
print(a)
print(b)

[0. 1. 2. 3. 4.]
[1. 2. 3. 4. 5.]


In [161]:
!echo "# 3 columns\n@ of integers\n0|1|2\n1|2|3\n2|3|4\n3|4|5\n4|5|6\n" > data/values.txt
!head data/values.txt

# 3 columns
@ of integers
0|1|2
1|2|3
2|3|4
3|4|5
4|5|6



In [162]:
a,b,c = np.loadtxt('data/values.txt',skiprows=2,unpack=True,delimiter='|')
print(a)
print(b)
print(c)

[0. 1. 2. 3. 4.]
[1. 2. 3. 4. 5.]
[2. 3. 4. 5. 6.]


In [163]:
np.savetxt('data/values.txt',np.c_[a,b,c],fmt='%1.2f',delimiter='...')
!head data/values.txt

0.00...1.00...2.00
1.00...2.00...3.00
2.00...3.00...4.00
3.00...4.00...5.00
4.00...5.00...6.00


In [164]:
np.c_[a,b,c]

array([[0., 1., 2.],
       [1., 2., 3.],
       [2., 3., 4.],
       [3., 4., 5.],
       [4., 5., 6.]])

### Stack

In [165]:
np.column_stack( (a,b,c) )

array([[0., 1., 2.],
       [1., 2., 3.],
       [2., 3., 4.],
       [3., 4., 5.],
       [4., 5., 6.]])

In [166]:
array = np.stack( (a,b,c) ).T

### Split

In [167]:
a,b,c = np.split(array,3,axis=1)
print(a)
print(b)
print(c)

[[0.]
 [1.]
 [2.]
 [3.]
 [4.]]
[[1.]
 [2.]
 [3.]
 [4.]
 [5.]]
[[2.]
 [3.]
 [4.]
 [5.]
 [6.]]


In [168]:
a,b,c = np.split(array,[1,2],axis=1)
print(a)
print(b)
print(c)

[[0.]
 [1.]
 [2.]
 [3.]
 [4.]]
[[1.]
 [2.]
 [3.]
 [4.]
 [5.]]
[[2.]
 [3.]
 [4.]
 [5.]
 [6.]]


In [169]:
a,b,c,d,e = np.split(array,5,axis=0)
print(a)
print(b)
print(c)
print(d)
print(e)

[[0. 1. 2.]]
[[1. 2. 3.]]
[[2. 3. 4.]]
[[3. 4. 5.]]
[[4. 5. 6.]]


In [170]:
a,b,c,d,e = np.split(array,[1,2,3,4],axis=0)
print(a)
print(b)
print(c)
print(d)
print(e)

[[0. 1. 2.]]
[[1. 2. 3.]]
[[2. 3. 4.]]
[[3. 4. 5.]]
[[4. 5. 6.]]


<a id='3'></a>

# NUMPY: 3. Universal Functions

In [171]:
IFrame(src='https://www.youtube.com/embed/469ukhzwEPg', width=640, height=400)

Looping over arrays to operate on each element is very slow.<br>
For faster execution, vectorized operations are implemented as Numpy universal functions (ufuncs), e.g.:

|        operator       | equivalent ufunc |
|:---------------------:|:----------------:|
|           +           |     `np.add`     |
|           -           |   `np.subtract`  |
|           *           |   `np.multiply`  |
|           /           |   `np.divide`  |
|           **          |    `np.power`    |
|           %           |     `np.mod`     |

* comparison operators: `==`, `!=`, `>`, `<`, `>=`, `<=`;
* bitwise operators: `&`, `|`, `>>`, `<<`, `~`, `^`;
* trigonometric functions: `np.cos`, `np.sin`, `np.tan`, `np.arcsin`, etc.;
* exponential functions: `np.exp`, `np.expm1`;
* logarithmic functions: `np.log`, `np.log10`, `np.log1p`.

### Aggregation Functions
`np.min`, `np.max`, `np.sum`, `np.mean`, `np.std`, `np.argmax`, `np.argmin`, etc.

Let's estimate the value this series converges to:
\begin{equation*}
\lim_{n\rightarrow\infty} \sum_{k=0}^n \frac{1}{k^2-1} = \frac{3}{4} = 0.75
\end{equation*}
We are going to time the execution of:
* a Python for loop;
* a Fortran for loop;
* vectorized operations using NumPy.

In [172]:
def func_loop(n):
    result = 0
    for k in range(2,int(n)):
        result += 1./(k**2 - 1)
    return result

In [173]:
def func_ufunc(n):
    return ( 1. / (np.arange(2.,n)**2. - 1.) ).sum()

In [174]:
!pip install fortran-magic &> /dev/null # requires gcc from conda-forge

In [175]:
%load_ext fortranmagic

The fortranmagic extension is already loaded. To reload it, use:
  %reload_ext fortranmagic


In [176]:
%%fortran

subroutine func_fortran(n,result)
    integer, intent(in) :: n
    double precision, intent(out) :: result
    integer :: k
    result = 0
    do k = 2, n
        result = result + 1. / ( k*k - 1 )
    end do
end subroutine

In [177]:
print( func_loop(1e4), func_ufunc(1e4), func_fortran(1e4) )

0.7498999949995057 0.7498999949994998 0.7499000227319081


In [178]:
%%timeit
func_loop(1e4)

3.5 ms ± 258 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [179]:
%%timeit
func_fortran(1e4)

12.8 µs ± 447 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [180]:
%%timeit
func_ufunc(1e4)

33.3 µs ± 4.04 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Broadcasting

In [181]:
a = np.arange(0,4,1)
b = np.arange(4,8,1)
print(a)
print(b)

[0 1 2 3]
[4 5 6 7]


In [182]:
a + b

array([ 4,  6,  8, 10])

In [183]:
a = np.arange(0,5,1)
a[np.newaxis,:]

array([[0, 1, 2, 3, 4]])

In [184]:
b[:,np.newaxis]

array([[4],
       [5],
       [6],
       [7]])

In [185]:
a[np.newaxis,:] + b[:,np.newaxis]

array([[ 4,  5,  6,  7,  8],
       [ 5,  6,  7,  8,  9],
       [ 6,  7,  8,  9, 10],
       [ 7,  8,  9, 10, 11]])

In [186]:
np.vstack( (a[np.newaxis,:],a[np.newaxis,:],a[np.newaxis,:],a[np.newaxis,:]) )

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [187]:
np.hstack( (b[:,np.newaxis],b[:,np.newaxis],b[:,np.newaxis],
           b[:,np.newaxis],b[:,np.newaxis]) )

array([[4, 4, 4, 4, 4],
       [5, 5, 5, 5, 5],
       [6, 6, 6, 6, 6],
       [7, 7, 7, 7, 7]])

In [188]:
b[:,np.newaxis] * 3

array([[12],
       [15],
       [18],
       [21]])

In [189]:
( a[np.newaxis,:] + b[:,np.newaxis] ) / b[:,np.newaxis]

array([[1.        , 1.25      , 1.5       , 1.75      , 2.        ],
       [1.        , 1.2       , 1.4       , 1.6       , 1.8       ],
       [1.        , 1.16666667, 1.33333333, 1.5       , 1.66666667],
       [1.        , 1.14285714, 1.28571429, 1.42857143, 1.57142857]])

In [190]:
np.ones( (3,4) ).shape

(3, 4)

In [191]:
np.ones(4)[np.newaxis,:].shape

(1, 4)

In [192]:
np.ones(4)[np.newaxis,:] + np.ones( (3,4) )

array([[2., 2., 2., 2.],
       [2., 2., 2., 2.],
       [2., 2., 2., 2.]])

In [193]:
np.ones(4)[:,np.newaxis].shape

(4, 1)

In [194]:
np.ones(4)[:,np.newaxis] + np.ones( (3,4) )

ValueError: operands could not be broadcast together with shapes (4,1) (3,4) 