## NumPy for Numerical Analysis
Numeric Python (numpy.org)

In [2]:
# pip install numpy
# pip3 install numpy
# python -m pip install numpy
# python3 -m pip install numpy
# py -m pip install numpy
# !pip install numpy # if you really have to
import numpy as np

In [3]:
dir(np)

['ALLOW_THREADS',
 'AxisError',
 'BUFSIZE',
 'CLIP',
 'DataSource',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'RAISE',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'WRAP',
 '_CopyMode',
 '_NoValue',
 '_UFUNC_API',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__deprecated_attrs__',
 '__dir__',
 '__doc__',
 '__expired_functions__',
 '__file__',
 '__getattr__',
 '__git_version__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_add_newdoc_ufunc',
 '_distributor_init',
 '_financial_names',
 

We find a similar function to range called arange


In [4]:
help(np.arange)

Help on built-in function arange in module numpy:

arange(...)
    arange([start,] stop[, step,], dtype=None, *, like=None)
    
    Return evenly spaced values within a given interval.
    
    Values are generated within the half-open interval ``[start, stop)``
    (in other words, the interval including `start` but excluding `stop`).
    For integer arguments the function is equivalent to the Python built-in
    `range` function, but returns an ndarray rather than a list.
    
    When using a non-integer step, such as 0.1, it is often better to use
    
    Parameters
    ----------
    start : integer or real, optional
        Start of interval.  The interval includes this value.  The default
        start value is 0.
    stop : integer or real
        End of interval.  The interval does not include this value, except
        in some cases where `step` is not an integer and floating point
        round-off affects the length of `out`.
    step : integer or real, optional
        S

In [5]:
an_array = np.arange(10)
print(an_array)
print(type(an_array))

[0 1 2 3 4 5 6 7 8 9]
<class 'numpy.ndarray'>


In [6]:
python_list = [i for i in range(10000)]
numpy_array = np.arange(10000)

In [7]:
sum(python_list)

49995000

In [8]:
np.sum(numpy_array)

49995000

The key feature of NumPy is the performance benefit coming with the arrays compared to standard Python lists. We can measure the performance difference by using timeit module.

In [9]:
import timeit
dir(timeit)

['Timer',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_globals',
 'default_number',
 'default_repeat',
 'default_timer',
 'dummy_src_name',
 'gc',
 'itertools',
 'main',
 'reindent',
 'repeat',
 'sys',
 'template',
 'time',
 'timeit']

Compare standard python with NumPy

In [10]:
help(timeit.timeit)

Help on function timeit in module timeit:

timeit(stmt='pass', setup='pass', timer=<built-in function perf_counter>, number=1000000, globals=None)
    Convenience function to create Timer object and call timeit method.



In [20]:
# standard Python

standard_statement = \
'''
s = 0
for i in range(10000):
    s += i
'''
# standard Python with list comprehension
standard_plus_statement = \
'''
n = [i for i in range(10000)]
s = sum(n)
'''
# Numpy array
numpy_statement = \
'''
import numpy as np
n = np.arange(10000)
s = np.sum(n)
'''

In [24]:
timeit.timeit(standard_statement, number=1000)

0.37482409994117916

In [25]:
timeit.timeit(standard_plus_statement, number=1000)

0.3088127000955865

In [26]:
timeit.timeit(numpy_statement, number=1000)

0.021691299974918365

Where does this performance improvement come from? It is something about the way they use the memory.

In [27]:
n_standard = []
print(f"{hex(id(n_standard))} << Initial Memory Location")
for i in range(5):
    n_standard.append(i)
    print(f"{hex(id(n_standard))}", end=':')
    print(f"{[hex(id(x)) for x in n_standard]}")

0x1f66e0b5500 << Initial Memory Location
0x1f66e0b5500:['0x1f6687200d0']
0x1f66e0b5500:['0x1f6687200d0', '0x1f6687200f0']
0x1f66e0b5500:['0x1f6687200d0', '0x1f6687200f0', '0x1f668720110']
0x1f66e0b5500:['0x1f6687200d0', '0x1f6687200f0', '0x1f668720110', '0x1f668720130']
0x1f66e0b5500:['0x1f6687200d0', '0x1f6687200f0', '0x1f668720110', '0x1f668720130', '0x1f668720150']


In [30]:
n = n_numpy = np.array([])
print(f"{hex(id(n_numpy))} << Initial Memory Location")
for i in range(5):
    n_numpy = np.append(n_numpy, np.array(i))
    print(f"{hex(id(n_numpy))}", end=':')
    print(f"{[hex(id(x)) for x in n_numpy]}")

0x1f66ebb77b0 << Initial Memory Location
0x1f66ebb7b70:['0x1f66eadfa10']
0x1f66ebb7510:['0x1f66eadff90', '0x1f66eadfb10']
0x1f66ebb6e50:['0x1f66eadfa10', '0x1f66eadff90', '0x1f66eadfa10']
0x1f66ebb6cd0:['0x1f66eadfb10', '0x1f66eadff90', '0x1f66eadfb10', '0x1f66eadff90']
0x1f66ebb7b70:['0x1f66eadfa10', '0x1f66eadfb10', '0x1f66eadfa10', '0x1f66eadfb10', '0x1f66eadfa10']


In [None]:
# if you want to go deeper, you can check a memory address by using the following code
#import ctypes
#g = (ctypes.c_char*40000).from_address(0x1103bbdc0)
#print(g.value.decode("utf-8", errors='ignore'))

## What makes numpy faster or slower?
Numpy arrays are stored at one continuous place in memory, so processes can access and manipulate them very efficiently.

This behavior is called locality of reference in computer science.

This is the main reason why NumPy is SOMETIMES faster than lists.

In [31]:
standard_append = \
'''
n = []
for i in range(10000):
    n.append(i)
'''
numpy_append = \
'''
import numpy as np
n = np.array([])
for i in range(10000):
    n = np.append(n, np.array(i))
'''

In [32]:
timeit.timeit(standard_append, number=100)

0.046452300040982664

In [None]:
timeit.timeit(numpy_append, number=100)

## NumPy Arrays
0-D Array: Scalar

1-D Array: Vector

2-D Array: Matrix

N-D Array

In [33]:
a_scalar = np.array(17)
print(a_scalar, type(a_scalar))

17 <class 'numpy.ndarray'>


In [34]:
a_vector = np.array([3, 5])
print(a_vector, type(a_vector))

[3 5] <class 'numpy.ndarray'>


In [35]:
a_vector_from_tuple = np.array((3, 5, 7))
print(a_vector_from_tuple, type(a_vector_from_tuple))

[3 5 7] <class 'numpy.ndarray'>


In [36]:
a_vector_from_set = np.array({3, 3, 3, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7})
print(a_vector_from_set, type(a_vector_from_set))

{3, 5, 7} <class 'numpy.ndarray'>


In [37]:
# indexing and slicing
print(a_vector[1])
print(a_vector_from_tuple[1:])
print(a_vector_from_set[1])

5
[5 7]


IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

In [38]:
an_array_with_chars = np.array(['a', 'Bora', 'Canbula', 7])
print(an_array_with_chars, type(an_array_with_chars))
# 7 here will be recognaized as string

['a' 'Bora' 'Canbula' '7'] <class 'numpy.ndarray'>


In [43]:
two_d_array = np.array([[1, 2, 3], [4, 5, 6]])
print(two_d_array, type(two_d_array))


[[1 2 3]
 [4 5 6]] <class 'numpy.ndarray'>


In [47]:
# indexing with multiple dimensions
print(two_d_array[:, 1:])
# higher dimensions
m_1 = np.array([[11, 12, 13],
                [14, 15, 16],
                [17, 18, 19]])
m_2 = np.array([[21, 22, 23],
                [24, 25, 26],
                [27, 28, 29]])
m_3 = np.array([[31, 32, 33],
                [34, 35, 36],
                [37, 38, 39]])
m = np.array([m_1, m_2, m_3])
print(m.shape,m.ndim)
print(m[:, 2, :].reshape(9))

[[2 3]
 [5 6]]
(3, 3, 3) 3
[17 18 19 27 28 29 37 38 39]


In [48]:
help(np.reshape)

Help on function reshape in module numpy:

reshape(a, newshape, order='C')
    Gives a new shape to an array without changing its data.
    
    Parameters
    ----------
    a : array_like
        Array to be reshaped.
    newshape : int or tuple of ints
        The new shape should be compatible with the original shape. If
        an integer, then the result will be a 1-D array of that length.
        One shape dimension can be -1. In this case, the value is
        inferred from the length of the array and remaining dimensions.
    order : {'C', 'F', 'A'}, optional
        Read the elements of `a` using this index order, and place the
        elements into the reshaped array using this index order.  'C'
        means to read / write the elements using C-like index order,
        with the last axis index changing fastest, back to the first
        axis index changing slowest. 'F' means to read / write the
        elements using Fortran-like index order, with the first index
        c

Type is not different so let's look at number of dimensions

In [49]:
print(a_scalar.ndim)
print(a_vector.ndim)
print(two_d_array.ndim)


0
1
2


Number of items in an array gives you the shape

In [50]:
print(a_scalar.shape)
print(a_vector.shape)
print(two_d_array.shape)

()
(2,)
(2, 3)


In [51]:
a_matrix = np.array([[1, 2, 3],
                     [4, 5, 6],
                     [7, 8, 9]])
print(a_matrix)
print(a_matrix.ndim)
print(a_matrix.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
2
(3, 3)


In [52]:
print(a_matrix.reshape(9, 1))

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]


In [53]:
a_matrix.reshape(9, 1)
print(a_matrix)
print(a_matrix.ndim)
print(a_matrix.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
2
(3, 3)


In [54]:
np.array([[1, 2, 3], [4, 5, 6]]).shape

(2, 3)

In [56]:
n_1 = [1, 2, 3]
n_2 = [4, 5, 6]
n_3 = [7, 8, 9]
n = np.array([n_1 + n_2 + n_3])

In [58]:
n

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [59]:
n = np.arange(10)
n.reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [60]:
np.arange(10).reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [61]:
n.reshape(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [62]:
a = np.array([0, 1, 2, 3, 4], ndmin=2)

In [63]:
a

array([[0, 1, 2, 3, 4]])

## View and Copy Concept in NumPy
view affected by the changes made to the original array
copy not affected by the changes made to the original array

In [64]:
n = np.arange(10)

In [65]:
m = n.view() # just returns a view of the array
print(n)
print(m)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]


In [67]:
m[0] = 7 # change the values of n

In [68]:
# both of them are affected
print(n)
print(m)

[7 1 2 3 4 5 6 7 8 9]
[7 1 2 3 4 5 6 7 8 9]


In [69]:
n = np.arange(10)
m = n.copy()
print(n)
print(m)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]


In [70]:
m[0] = 7

In [71]:
# the original array is not affected from its copy
print(n)
print(m)

[0 1 2 3 4 5 6 7 8 9]
[7 1 2 3 4 5 6 7 8 9]


In [72]:
# you can check if an array is a view or not
n = np.arange(10)
n_view = n.view()
n_copy = n.copy()
print(n.base)
print(n_view.base) # this one returns the original array
print(n_copy.base)

None
[0 1 2 3 4 5 6 7 8 9]
None


## Data Types in Python vs NumPy

In [73]:
np.typecodes

{'Character': 'c',
 'Integer': 'bhilqp',
 'UnsignedInteger': 'BHILQP',
 'Float': 'efdg',
 'Complex': 'FDG',
 'AllInteger': 'bBhHiIlLqQpP',
 'AllFloat': 'efdgFDG',
 'Datetime': 'Mm',
 'All': '?bhilqpBHILQPefdgFDGSUVOMm'}

In [74]:
np.sctypes # scalar types?

{'int': [numpy.int8, numpy.int16, numpy.int32, numpy.int64],
 'uint': [numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64],
 'float': [numpy.float16, numpy.float32, numpy.float64],
 'complex': [numpy.complex64, numpy.complex128],
 'others': [bool, object, bytes, str, numpy.void]}

## Default Python Data Types
<ul>
<li>strings</li>
<li>integer</li>
<li>float</li>
<li>boolean</li>
<li>complex</li>
</ul>

## Data Types in NumPy
<ul>
<li> i integer</li>
<li> b boolean</li>
<li> u unsigned integer</li>
<li> f float</li>
<li> c complex float</li>
<li> m timedelta</li>
<li> M datetime</li>
<li> O object</li>
<li> S string </li>
<li> U unicode string</li>
<li> V fixed chunk of memory for other type</li>

</ul>

In [85]:
# check the data type of NumPy array
a = np.array([1, 2, 3])
a.dtype

dtype('int32')

In [84]:
# why int64? create array with a defined data type
a = np.array([1, 2, 3], dtype='i')
a.dtype

dtype('int32')

In [83]:
# why int32? try again
a = np.array([1, 2, 3], dtype=np.int8)
a.dtype

dtype('int8')

In [86]:
# check the limits
a = np.array([128], dtype=np.int8)
print(a.dtype)
print(a.dtype.name)
print(a)

int8
int8
[-128]


In [87]:
# what does unsigned mean?
a = np.array([256], dtype=np.uint8)
print(a.dtype)
print(a.dtype.name)
print(a)

uint8
uint8
[0]


In [88]:
# conversion errors
a = np.array(['a', '2', '3'], dtype='i')
print(a.dtype)
print(a)

ValueError: invalid literal for int() with base 10: 'a'

In [89]:
try:
    a = np.array(['a', '2', '3'], dtype='i')
except ValueError:
    print('There is something wrong with the values!')
finally:
    a = np.array([], dtype='i')
print(a.dtype)
print(a)

There is something wrong with the values!
int32
[]


## Initial Placeholders

In [90]:
print('Empty')
print(np.empty((4,4), dtype='f'))
print('Zeros')
print(np.zeros((2,3)))
print(np.zeros((2,2), dtype='i'))
print('Ones')
print(np.ones((2,3,4), dtype=np.complex128))
print("*"*40)
print(np.array([complex(1) for _ in range(24)]).reshape(2, 3, 4))
print('Linear Space')
print(np.linspace(1, 2, 10))
print('Filled with a default value')
print(np.full((3,3), 5))
print('Identity Matrix')
print(np.eye(3))
print('Random')
print(np.random.random((3,3)))
print(np.random.randint(0, 99, (3,3)))

Empty
[[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 2.6736775e-42 0.0000000e+00]
 [0.0000000e+00 1.0010205e-38 0.0000000e+00 1.0469402e-38]]
Zeros
[[0. 0. 0.]
 [0. 0. 0.]]
[[0 0]
 [0 0]]
Ones
[[[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]

 [[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]]
****************************************
[[[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]

 [[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]]
Linear Space
[1.         1.11111111 1.22222222 1.33333333 1.44444444 1.55555556
 1.66666667 1.77777778 1.88888889 2.        ]
Filled with a default value
[[5 5 5]
 [5 5 5]
 [5 5 5]]
Identity Matrix
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
Random
[[0.31579294 0.94826878 0.94722

## Array Operations
<ul>
<li> Element-wise Operations</li>
<li> Aggregate Operations</li>
</ul>

In [91]:
# default python list
a_py = [1, 2, 3]
b_py = [4, 5, 6]
c_py = a_py + b_py

In [92]:
# Element-wise operation: for each element
a_np = np.array([1, 2, 3])
b_np = np.array([4, 5, 6])
c_np = a_np + b_np

In [93]:
print(c_py)
print(c_np)

[1, 2, 3, 4, 5, 6]
[5 7 9]


In [94]:
c_py * 3

[1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]

In [95]:
c_np * 3

array([15, 21, 27])

In [98]:
#c_np ** 2
#np.power(c_np, 2)
c_np + 1 # this is broadcasting (any -> np.array)

array([ 6,  8, 10])

In [99]:
c_np_like_python = np.append(a_np, b_np)
print(c_np_like_python)

[1 2 3 4 5 6]


In [102]:
# Aggregate operations
n = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print(sum(n))
print(np.sum(n))
print(np.sum(n, axis=0))
print(np.sum(n, axis=1))
#print(np.sum(n, axis=2))
print('')
try:
    print(np.sum(n, axis=2))
except np.AxisError:
    for i in range(n.ndim):
        print(np.sum(n, axis=i))
finally:
    pass

[12 15 18]
45
[12 15 18]
[ 6 15 24]

[12 15 18]
[ 6 15 24]


In [101]:
help(np.sum)

Help on function sum in module numpy:

sum(a, axis=None, dtype=None, out=None, keepdims=<no value>, initial=<no value>, where=<no value>)
    Sum of array elements over a given axis.
    
    Parameters
    ----------
    a : array_like
        Elements to sum.
    axis : None or int or tuple of ints, optional
        Axis or axes along which a sum is performed.  The default,
        axis=None, will sum all of the elements of the input array.  If
        axis is negative it counts from the last to the first axis.
    
        .. versionadded:: 1.7.0
    
        If axis is a tuple of ints, a sum is performed on all of the axes
        specified in the tuple instead of a single axis or all the axes as
        before.
    dtype : dtype, optional
        The type of the returned array and of the accumulator in which the
        elements are summed.  The dtype of `a` is used by default unless `a`
        has an integer dtype of less precision than the default platform
        integer.  In 

## Practice

In [103]:
# initialize a 4-by-4 NumPy array with two-digits random integers
a = np.random.randint(0, 99, (4,4))
print(a)
# fill the center 2x2 matrix with -1
b = np.full((2,2), -1)
print(b)
a[1:3,1:3] = b
print(a)

[[76 12 32 74]
 [75 70  3 47]
 [11 69 56 79]
 [54 91 87  7]]
[[-1 -1]
 [-1 -1]]
[[76 12 32 74]
 [75 -1 -1 47]
 [11 -1 -1 79]
 [54 91 87  7]]


In [104]:
# generalize this for d digits n-by-n and m-by-m
def change_center(d, n, m):
    a = np.random.randint(0, 10**d - 1, (n,n))
    b = np.full((m,m), -1)
    print(a)
    print(b)
    k = (n-m)//2
    a[k:k+m,k:k+m] = b
    print(a)

In [105]:
change_center(2, 8, 6)

[[36 63 12 15 58 89 96 54]
 [56 65 52 98 81 80 50 38]
 [82 47 22 78 36 73 62 77]
 [ 0 50 58 70  9 76 64 88]
 [25 79 23 95 88 79 67 81]
 [71 31 77 30 67 54 91 44]
 [69 65 98 38 81 32 18 75]
 [ 1 75 67 28 30 89 72 97]]
[[-1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1 -1]]
[[36 63 12 15 58 89 96 54]
 [56 -1 -1 -1 -1 -1 -1 38]
 [82 -1 -1 -1 -1 -1 -1 77]
 [ 0 -1 -1 -1 -1 -1 -1 88]
 [25 -1 -1 -1 -1 -1 -1 81]
 [71 -1 -1 -1 -1 -1 -1 44]
 [69 -1 -1 -1 -1 -1 -1 75]
 [ 1 75 67 28 30 89 72 97]]
