In [1]:
import numpy as np

# Why numpy?

In [2]:
numbers = list(range(10 ** 4))

numbers[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [3]:
def my_first_sum(numbers):
    sum = 0
    for i in range(len(numbers)):
        sum += numbers[i]
    return sum

%timeit my_first_sum(numbers)

753 µs ± 8.74 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [4]:
def my_second_sum(numbers):
    sum = 0
    for number in numbers:
        sum += number
    return sum

%timeit my_second_sum(numbers)

439 µs ± 4.32 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [5]:
%timeit sum(numbers)

72.7 µs ± 801 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
print(my_second_sum, type(my_second_sum))
print(sum, type(sum))

<function my_second_sum at 0x7f10788bc440> <class 'function'>
<built-in function sum> <class 'builtin_function_or_method'>


In [7]:
numbers_numpy = np.array(numbers)

%timeit numbers_numpy.sum()

11 µs ± 157 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


# Array creation

In [8]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
a = np.arange(15).reshape((3, 5))
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [10]:
a.shape, a.dtype

((3, 5), dtype('int64'))

In [11]:
identity = np.eye(5)
identity

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [12]:
np.ones_like(identity)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [13]:
np.random.uniform()

0.27166214739441985

In [14]:
randoms = np.random.uniform(size=(3, 3))
randoms

array([[0.34972786, 0.92642685, 0.14539332],
       [0.84174722, 0.68578826, 0.66688913],
       [0.62929744, 0.51479979, 0.45743071]])

In [15]:
randoms.dtype

dtype('float64')

# Basic Operations

In [16]:
a = np.array([20, 30, 40, 50])
b = np.arange(4)

a - b

array([20, 29, 38, 47])

In [17]:
b ** 2

array([0, 1, 4, 9])

In [18]:
10 * np.sin(a)

array([ 9.12945251, -9.88031624,  7.4511316 , -2.62374854])

In [19]:
a < 35

array([ True,  True, False, False])

In [20]:
A = np.array([
    [1., 1.],
    [0., 1.],
])

B = np.array([
    [2, 0],
    [3, 4],
])

A.dtype, B.dtype

(dtype('float64'), dtype('int64'))

In [21]:
A * B

array([[2., 0.],
       [0., 4.]])

In [22]:
A.dot(B)

array([[5., 4.],
       [3., 4.]])

In [23]:
np.dot(A, B)

array([[5., 4.],
       [3., 4.]])

In [24]:
A @ B

array([[5., 4.],
       [3., 4.]])

In [25]:
A += 1
A

array([[2., 2.],
       [1., 2.]])

In [26]:
randoms = np.random.uniform(size=(3, 3))
randoms

array([[0.94790305, 0.64476608, 0.5292369 ],
       [0.50547859, 0.95079537, 0.60407707],
       [0.02404521, 0.04311794, 0.78408657]])

In [27]:
randoms.sum()

5.033506770623625

In [28]:
randoms.min(), randoms.mean(), randoms.max()

(0.024045205699700034, 0.5592785300692917, 0.9507953661937781)

# Operations over axes

In [29]:
M = np.arange(12, dtype=np.float).reshape((3, 4))
M

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [30]:
M.sum(axis=0)

array([12., 15., 18., 21.])

In [31]:
M.sum(axis=1)

array([ 6., 22., 38.])

In [32]:
P = np.arange(3 * 4 * 5, dtype=np.float).reshape((3, 4, 5))
P

array([[[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.]],

       [[20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29.],
        [30., 31., 32., 33., 34.],
        [35., 36., 37., 38., 39.]],

       [[40., 41., 42., 43., 44.],
        [45., 46., 47., 48., 49.],
        [50., 51., 52., 53., 54.],
        [55., 56., 57., 58., 59.]]])

In [33]:
P.min(axis=0)

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.],
       [15., 16., 17., 18., 19.]])

In [34]:
P.min(axis=1)

array([[ 0.,  1.,  2.,  3.,  4.],
       [20., 21., 22., 23., 24.],
       [40., 41., 42., 43., 44.]])

In [35]:
P.min(axis=2)

array([[ 0.,  5., 10., 15.],
       [20., 25., 30., 35.],
       [40., 45., 50., 55.]])

In [36]:
P.min(axis=-1)

array([[ 0.,  5., 10., 15.],
       [20., 25., 30., 35.],
       [40., 45., 50., 55.]])

In [37]:
P.min(axis=(0, 1))

array([0., 1., 2., 3., 4.])

# Indexing and Slicing

In [38]:
a = np.arange(10) ** 3
a

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [39]:
a[2]

8

In [40]:
a[2:5]

array([ 8, 27, 64])

In [41]:
a[:5]

array([ 0,  1,  8, 27, 64])

In [42]:
a[-3:]

array([343, 512, 729])

In [43]:
a[::2]

array([  0,   8,  64, 216, 512])

In [44]:
a[::-1]

array([729, 512, 343, 216, 125,  64,  27,   8,   1,   0])

In [45]:
b = a[::2]
b[-1] = -1
b

array([  0,   8,  64, 216,  -1])

In [46]:
a

array([  0,   1,   8,  27,  64, 125, 216, 343,  -1, 729])

In [47]:
a[:3] = -10

In [48]:
a

array([-10, -10, -10,  27,  64, 125, 216, 343,  -1, 729])

In [49]:
P = np.arange(3 * 4 * 5, dtype=np.float).reshape((3, 4, 5))
P

array([[[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.]],

       [[20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29.],
        [30., 31., 32., 33., 34.],
        [35., 36., 37., 38., 39.]],

       [[40., 41., 42., 43., 44.],
        [45., 46., 47., 48., 49.],
        [50., 51., 52., 53., 54.],
        [55., 56., 57., 58., 59.]]])

In [50]:
P[0, :, :]

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.],
       [15., 16., 17., 18., 19.]])

In [51]:
P[0]

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.],
       [15., 16., 17., 18., 19.]])

In [52]:
P[0, :, -1]

array([ 4.,  9., 14., 19.])

In [53]:
P[0, :, 2:]

array([[ 2.,  3.,  4.],
       [ 7.,  8.,  9.],
       [12., 13., 14.],
       [17., 18., 19.]])

In [54]:
P[..., 0]

array([[ 0.,  5., 10., 15.],
       [20., 25., 30., 35.],
       [40., 45., 50., 55.]])

In [55]:
P[0, ..., 0]

array([ 0.,  5., 10., 15.])

In [56]:
P

array([[[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.]],

       [[20., 21., 22., 23., 24.],
        [25., 26., 27., 28., 29.],
        [30., 31., 32., 33., 34.],
        [35., 36., 37., 38., 39.]],

       [[40., 41., 42., 43., 44.],
        [45., 46., 47., 48., 49.],
        [50., 51., 52., 53., 54.],
        [55., 56., 57., 58., 59.]]])

In [57]:
for p in P:
    print(p.min())

0.0
20.0
40.0


# Fancy Indexing

In [58]:
a = np.arange(10) ** 3
a

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [59]:
is_even = a % 2 == 0
is_even

array([ True, False,  True, False,  True, False,  True, False,  True,
       False])

In [60]:
a[is_even]

array([  0,   8,  64, 216, 512])

In [61]:
b = a.copy()
b[is_even] = -1
b

array([ -1,   1,  -1,  27,  -1, 125,  -1, 343,  -1, 729])

In [62]:
indexer = [0, 1, 4]
a[indexer]

array([ 0,  1, 64])

In [63]:
c = a.copy()
c[indexer] = -1
c

array([ -1,  -1,   8,  27,  -1, 125, 216, 343, 512, 729])

In [64]:
a[[2, 3, 2, 2]]

array([ 8, 27,  8,  8])

In [65]:
M = np.arange(12, dtype=np.float).reshape((3, 4))
M

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [66]:
is_even = M % 2 == 0
is_even

array([[ True, False,  True, False],
       [ True, False,  True, False],
       [ True, False,  True, False]])

In [67]:
M[is_even]

array([ 0.,  2.,  4.,  6.,  8., 10.])

In [68]:
is_all_even_column = is_even.all(axis=0)
is_all_even_column

array([ True, False,  True, False])

In [69]:
M[:, is_all_even_column]

array([[ 0.,  2.],
       [ 4.,  6.],
       [ 8., 10.]])

In [70]:
M[:, [0, 2]]

array([[ 0.,  2.],
       [ 4.,  6.],
       [ 8., 10.]])

In [71]:
M[1, is_all_even_column]

array([4., 6.])

In [72]:
M[[0, 2], [1, 3]]

array([ 1., 11.])

In [73]:
M[[0, 2]][:, [1, 3]]

array([[ 1.,  3.],
       [ 9., 11.]])

In [74]:
M[np.meshgrid([0, 2], [1, 3], indexing='ij')]

  """Entry point for launching an IPython kernel.


array([[ 1.,  3.],
       [ 9., 11.]])

In [75]:
np.meshgrid([0, 2], [1, 3], indexing='ij')

[array([[0, 0],
        [2, 2]]), array([[1, 3],
        [1, 3]])]

# Broadcasting

In [76]:
M = np.arange(12, dtype=np.float).reshape((3, 4))
M

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [77]:
M.T

array([[ 0.,  4.,  8.],
       [ 1.,  5.,  9.],
       [ 2.,  6., 10.],
       [ 3.,  7., 11.]])

In [78]:
M + M.T

ValueError: operands could not be broadcast together with shapes (3,4) (4,3) 

In [79]:
column_factors = np.array([1, 2, 10, 4])
row_factors = np.array([3, 0, -1])

for a in [M, column_factors, row_factors]:
    print(a.shape)

(3, 4)
(4,)
(3,)


In [80]:
M * column_factors

array([[  0.,   2.,  20.,  12.],
       [  4.,  10.,  60.,  28.],
       [  8.,  18., 100.,  44.]])

In [81]:
M * row_factors

ValueError: operands could not be broadcast together with shapes (3,4) (3,) 

In [82]:
row_factors[:, None].shape

(3, 1)

In [83]:
M * row_factors[:, None]

array([[  0.,   3.,   6.,   9.],
       [  0.,   0.,   0.,   0.],
       [ -8.,  -9., -10., -11.]])

In [84]:
2 * M

array([[ 0.,  2.,  4.,  6.],
       [ 8., 10., 12., 14.],
       [16., 18., 20., 22.]])

In [85]:
matrix_factors = np.array([2, 10])
matrix_factors[:, None, None] * M

array([[[  0.,   2.,   4.,   6.],
        [  8.,  10.,  12.,  14.],
        [ 16.,  18.,  20.,  22.]],

       [[  0.,  10.,  20.,  30.],
        [ 40.,  50.,  60.,  70.],
        [ 80.,  90., 100., 110.]]])

Broadcasting rule
---
When operating on two arrays, NumPy compares their shapes element-wise.
It starts with the trailing dimensions, and works its way forward. Two dimensions are compatible when

  1. they are equal, or
  2. one of them is 1

# Finding help

In [86]:
np.sum?

[0;31mSignature:[0m
[0mnp[0m[0;34m.[0m[0msum[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0ma[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeepdims[0m[0;34m=[0m[0;34m<[0m[0mno[0m [0mvalue[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitial[0m[0;34m=[0m[0;34m<[0m[0mno[0m [0mvalue[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwhere[0m[0;34m=[0m[0;34m<[0m[0mno[0m [0mvalue[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Sum of array elements over a given axis.

Parameters
----------
a : array_like
    Elements to sum.
axis : None or int or tuple of ints, optional
    Axis or axes along which a sum is performed.  The default,
    axis=None, wi

In [87]:
np.sum??

[0;31mSignature:[0m
[0mnp[0m[0;34m.[0m[0msum[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0ma[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeepdims[0m[0;34m=[0m[0;34m<[0m[0mno[0m [0mvalue[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitial[0m[0;34m=[0m[0;34m<[0m[0mno[0m [0mvalue[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mwhere[0m[0;34m=[0m[0;34m<[0m[0mno[0m [0mvalue[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;34m@[0m[0marray_function_dispatch[0m[0;34m([0m[0m_sum_dispatcher[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;32mdef[0m [0msum[0m[0;34m([0m[0ma[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m 

In [88]:
help(np.sum)

Help on function sum in module numpy:

sum(a, axis=None, dtype=None, out=None, keepdims=<no value>, initial=<no value>, where=<no value>)
    Sum of array elements over a given axis.
    
    Parameters
    ----------
    a : array_like
        Elements to sum.
    axis : None or int or tuple of ints, optional
        Axis or axes along which a sum is performed.  The default,
        axis=None, will sum all of the elements of the input array.  If
        axis is negative it counts from the last to the first axis.
    
        .. versionadded:: 1.7.0
    
        If axis is a tuple of ints, a sum is performed on all of the axes
        specified in the tuple instead of a single axis or all the axes as
        before.
    dtype : dtype, optional
        The type of the returned array and of the accumulator in which the
        elements are summed.  The dtype of `a` is used by default unless `a`
        has an integer dtype of less precision than the default platform
        integer.  In 

In [89]:
np.lookfor('cumulative sum')

Search results for 'cumulative sum'
-----------------------------------
numpy.cumsum
    Return the cumulative sum of the elements along a given axis.
numpy.nancumsum
    Return the cumulative sum of array elements over a given axis treating Not a
numpy.ma.cumsum
    Return the cumulative sum of the array elements over the given axis.
numpy.chararray.cumsum
    Return the cumulative sum of the elements along the given axis.
numpy.ma.MaskedArray.cumsum
    Return the cumulative sum of the array elements over the given axis.
numpy.core._umath_tests.cumsum
    Cumulative sum of the input (n)->(n)
numpy.sum
    Sum of array elements over a given axis.
numpy.ufunc.reduce
    Reduces `a`'s dimension by one, by applying ufunc along one axis.
numpy.random.Generator.f
    Draw samples from an F distribution.
numpy.random.RandomState.f
    Draw samples from an F distribution.
numpy.random.Generator.binomial
    Draw samples from a binomial distribution.
numpy.random.Generator.logistic
    Draw s

In [90]:
np.show_config()

blas_mkl_info:
  NOT AVAILABLE
blis_info:
  NOT AVAILABLE
openblas_info:
  NOT AVAILABLE
atlas_3_10_blas_threads_info:
  NOT AVAILABLE
atlas_3_10_blas_info:
  NOT AVAILABLE
atlas_blas_threads_info:
  NOT AVAILABLE
atlas_blas_info:
  NOT AVAILABLE
accelerate_info:
  NOT AVAILABLE
blas_info:
    libraries = ['cblas', 'blas', 'blas']
    library_dirs = ['/usr/lib64']
    include_dirs = ['/usr/local/include', '/usr/include']
    language = c
    define_macros = [('HAVE_CBLAS', None)]
blas_opt_info:
    define_macros = [('NO_ATLAS_INFO', 1), ('HAVE_CBLAS', None)]
    libraries = ['cblas', 'blas', 'blas']
    library_dirs = ['/usr/lib64']
    include_dirs = ['/usr/local/include', '/usr/include']
    language = c
lapack_mkl_info:
  NOT AVAILABLE
openblas_lapack_info:
  NOT AVAILABLE
openblas_clapack_info:
  NOT AVAILABLE
flame_info:
  NOT AVAILABLE
atlas_3_10_threads_info:
  NOT AVAILABLE
atlas_3_10_info:
  NOT AVAILABLE
atlas_threads_info:
  NOT AVAILABLE
atlas_info:
  NOT AVAILABLE
lapack_i