In [12]:
#hide
from utils import *
hc(
    "How to get comfortable with NumPy?",
    ["ndarray", "type", "ndim", "shape", "size", "APIs"],
)
toc(
    [
        "NumPy arrays over python list",
        "Scalars",
        "Arrays",
        "Array properties",
        "Array operations",
        "Operations along an axis"  
    ],
    1
)

## NumPy
NumPy is a python **library** which is optimised to **store numerical values** (generally) and **perform numerical operations**.

## NumPy arrays over python list
1. Creating an numpy array is much faster than python list.
2. NumPy provides alot of convenient element-wise mathematical operations for which we might have to create loop with list.
3. We can get rid of python `for` loop for many operations.
4. NumPy operation are much faster.

In [11]:
import numpy as np
import time

#=========Comparing Creation time=========
# Python loop
tic = time.time()
lst = []
for i in range(10_000_000):
    lst.append(i)
toc = time.time()
print(f"Total time taken to create list: {toc - tic:.5f} sec")

# NumPy array
tic = time.time()
arr = np.arange(10_000_000) # No need for python loop
toc = time.time()
print(f"Total time taken to create numpy array: {toc - tic:.5f} sec")
print("-"*50)
#=========Comparing Operation time=========
# Python loop
tic = time.time()
for i in range(len(lst)):
    lst[i] = lst[i] ** 2
toc = time.time()
print(f"Total time taken to perform operation on list: {toc - tic:.5f} sec")

# NumPy array
tic = time.time()
arr = arr ** 2 # No need for python loop
toc = time.time()
print(f"Total time taken to perform operation on numpy array: {toc - tic:.5f} sec")

Total time taken to create list: 2.34560 sec
Total time taken to create numpy array: 0.04636 sec
--------------------------------------------------
Total time taken to perform operation on list: 3.88236 sec
Total time taken to perform operation on numpy array: 0.05914 sec


## Scalars

## Arrays
A collection of **numpy generic** objects.

In [70]:
arr = np.array([1, 1, 3])

## Array properties

### type
The type of any numpy array will be `np.ndarray`

In [86]:
arr.__class__.__mro__

(numpy.ndarray, object)

## ndim
Every `np.ndarray` object has `ndim` attribute that returns `int`.

In [4]:
arr = np.array(1) # 0 => Scalar => A number (no datastructure)
print(f"arr={arr}, \tndim={arr.ndim}\n")

arr = np.array([]) # 1 => Collection of scalar (0D array)
print(f"arr={arr}, ndim={arr.ndim}\n")

arr = np.array([1, 2, 3]) # 1 => Collection of scalar (0D array) => Also known as vector
print(f"arr={arr}, ndim={arr.ndim}\n")

arr = np.array([[], []]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}\n")

arr = np.array([[1, 2, 3], [5, 6, 7]]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}\n")

arr = np.array([[1, 2, 3]]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}\n")

arr = np.array([[[1, 2, 3]]]) # 3 => Collection of matrix (2D array) => Also known as 3D
print(f"arr={arr}, ndim={arr.ndim}\n")

arr=1, 	ndim=0

arr=[], ndim=1

arr=[1 2 3], ndim=1

arr=[], ndim=2

arr=[[1 2 3]
 [5 6 7]], ndim=2

arr=[[1 2 3]], ndim=2

arr=[[[1 2 3]]], ndim=3



## shape
Every `np.ndarray` object has `shape` attribute that returns `tuple` of length `ndim`.

In [5]:
arr = np.array(1) # 0 => Scalar => A number (no datastructure)
print(f"arr={arr}, \tndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([]) # 1 => Collection of scalar (0D array)
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([1, 2, 3]) # 1 => Collection of scalar (0D array) => Also known as vector
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([[], []]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([[1, 2, 3], [5, 6, 7]]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([[1, 2, 3]]) # 2 => Collection of vector (1D array) => Also known as matrix (row vector)
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([[1], [2], [3]]) # 2 => Collection of vector (1D array) => Also known as matrix (column vector)
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr = np.array([[[1, 2, 3]]]) # 3 => Collection of matrix (2D array) => Also known as 3D
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape}\n")

arr=1, 	ndim=0, shape=()

arr=[], ndim=1, shape=(0,)

arr=[1 2 3], ndim=1, shape=(3,)

arr=[], ndim=2, shape=(2, 0)

arr=[[1 2 3]
 [5 6 7]], ndim=2, shape=(2, 3)

arr=[[1 2 3]], ndim=2, shape=(1, 3)

arr=[[1]
 [2]
 [3]], ndim=2, shape=(3, 1)

arr=[[[1 2 3]]], ndim=3, shape=(1, 1, 3)



## size
Every `np.ndarray` object has `size` attribute that returns `int`. It is basically the total number of scalar objects.

In [6]:
arr = np.array(1) # 0 => Scalar => A number (no datastructure)
print(f"arr={arr}, \tndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([]) # 1 => Collection of scalar (0D array)
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([1, 2, 3]) # 1 => Collection of scalar (0D array) => Also known as vector
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([[], []]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([[1, 2, 3], [5, 6, 7]]) # 2 => Collection of vector (1D array) => Also known as matrix
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([[1, 2, 3]]) # 2 => Collection of vector (1D array) => Also known as matrix (row vector)
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([[1], [2], [3]]) # 2 => Collection of vector (1D array) => Also known as matrix (column vector)
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr = np.array([[[1, 2, 3]]]) # 3 => Collection of matrix (2D array) => Also known as 3D
print(f"arr={arr}, ndim={arr.ndim}, shape={arr.shape} size={arr.size}\n")

arr=1, 	ndim=0, shape=() size=1

arr=[], ndim=1, shape=(0,) size=0

arr=[1 2 3], ndim=1, shape=(3,) size=3

arr=[], ndim=2, shape=(2, 0) size=0

arr=[[1 2 3]
 [5 6 7]], ndim=2, shape=(2, 3) size=6

arr=[[1 2 3]], ndim=2, shape=(1, 3) size=3

arr=[[1]
 [2]
 [3]], ndim=2, shape=(3, 1) size=3

arr=[[[1 2 3]]], ndim=3, shape=(1, 1, 3) size=3



## dtype
Every `np.ndarray` object has `dtype` attribute that returns the type of scalar objects stored in the array. By default, it is `float64` but it is dynamic and updates as per the scalar objects in the array. One can manually set the dtype while creating `np.ndarray` object.

In [87]:
arr = np.array([])
print(f"{arr.dtype}\n")

arr = np.array([1, 2, 3])
print(f"{arr.dtype}\n")

arr = np.array([1, 2., 3])
print(f"{arr.dtype}\n")

arr = np.array([1, 2.5, 3])
print(f"{arr.dtype}\n")

arr = np.array([True, False])
print(f"{arr.dtype}\n")

arr = np.array(["a", "b", 1]) # 1 will be upgraded to str type
print(f"{arr.dtype}\n")

arr = np.array([True, False, 10]) # The elements will be upgraded to int with True -> 
print(f"{arr.dtype}\n")

float64

int64

float64

float64

bool

<U21

int64



## Array operations
Any array operations will result in one of the following:
1. Same sized array
2. Different sized array
3. Scalar

In [46]:
arr = np.array([1, 2, 3, 4])

# Basic arithmetic operations => Transformation that returns same dimension array but with different values
print(arr + 2) # Add
print(arr - 2) # Subtract
print(arr * 2) # Multiply
print(arr / 2) # True division
print(arr // 2) # Floor division
print(arr ** 2) # Exponentiation

# Other operations
print(np.sin(arr))
print(np.cos(arr))
print(np.tan(arr))
print(np.log(arr))
print(np.log2(arr))
print(np.log10(arr))
print(np.exp(arr))
print(np.cumsum(arr))
print(np.cumprod(arr))

[3 4 5 6]
[-1  0  1  2]
[2 4 6 8]
[0.5 1.  1.5 2. ]
[0 1 1 2]
[ 1  4  9 16]
[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]
[ 0.54030231 -0.41614684 -0.9899925  -0.65364362]
[ 1.55740772 -2.18503986 -0.14254654  1.15782128]
[0.         0.69314718 1.09861229 1.38629436]
[0.        1.        1.5849625 2.       ]
[0.         0.30103    0.47712125 0.60205999]
[ 2.71828183  7.3890561  20.08553692 54.59815003]
[ 1  3  6 10]
[ 1  2  6 24]


In [15]:
# Concatenation => Transformation that results in different sized array
concat_arr = np.concatenate([arr, arr, [1, 2, 1, 1]])
print(concat_arr, concat_arr.shape)

[1 2 3 4 1 2 3 4 1 2 1 1] (12,)


In [43]:
# Aggregation ops => Transformation that reduces dimension
arr = np.array([1, 2, 3, 4, 5])

print(np.sum(arr))
print(np.prod(arr))
print(np.mean(arr)) # sum / n.o. element
print(np.median(arr)) # centre value after sorting
print(np.min(arr))
print(np.max(arr))

print(np.argmin(arr)) # Index where mininum occured
print(np.argmax(arr)) # Index where mininum occured

15
120
3.0
3.0
1
5
0
4


## Operations along an axis

Any aggregation operation in numpy takes `axis` as a parameter to perform aggregation along that axis.

### 2D

In [117]:
# 2D
arr = np.array([[1, 2, 3], [1, 1, 1], [1, 2, 1]])

# Shape: (3, 3) 
# Sum: Axis 0: [1, 2, 3] + [1, 1, 1] + [1, 2, 1] = [3, 5, 5]; 
# Sum: Axis 1: [6, 3, 4]
print(arr.shape)
print(np.sum(arr, axis=0))
print(np.sum(arr, axis=1))

(3, 3)
[3 5 5]
[6 3 4]


### 3D
<div align="center">
  <img src="../data/images/martix_aggregation.png" width="700">
</div>

In [23]:
# 3D
arr = np.array([[[1, 2, 3], [1, 1, 1], [1, 2, 1]], [[1, 1, 3], [5, 1, 2], [1, 2, 2]]])

# Shape: (2, 3, 3) # arr[0, 1, :]

# Sum: Axis 0 (3, 3): [[1, 2, 3], [1, 1, 1], [1, 2, 1]] + [[1, 1, 3], [5, 1, 2], [1, 2, 2]] 
# = [[2, 3, 6], [6, 2, 3], [2, 4, 3]]  -> (3, 3) 
# Sum: Axis 1 (2, 3): [[1, 2, 3], [1, 1, 3]] + [[1, 1, 1], [5, 1, 2]] + [[1, 2, 1], [1, 2, 2]]
# = [[3, 5, 5], [7, 4, 7]]
# Sum: Axis 2 (2, 3): [[1, 1, 1], [1, 5, 1]] + [[2, 1, 2], [1, 1, 2]] + [[3, 1, 1], [3, 2, 2]]
# = [[6, 3, 4], [5, 8, 5]]
print(arr.shape)
print(np.sum(arr, axis=None)) # This is by default, all the axes will be collapsed
print(np.sum(arr, axis=0))
print(np.sum(arr, axis=1))
print(np.sum(arr, axis=2))

(2, 3, 3)
31
[[2 3 6]
 [6 2 3]
 [2 4 3]]
[[3 5 5]
 [7 4 7]]
[[6 3 4]
 [5 8 5]]


## APIs to generate numpy arrays

`numpy` provides several APIs to generate some special arrays. Some of these are:
1. arange: Generates array of values ranging from start to end with a given step size (end not included)
2. linspace, logspace: Generates array of values between start and end (included) with linear/log spacing for a given number of points.
4. zeros, zeros_like: Generates array of size N, with all elements zeros.
5. ones, ones_like: Generates array of size N, with all elements ones.
6. random: Generate array of random numbers.

In [27]:
y = np.arange(0, 100, 1)
print(y)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [35]:
y = np.linspace(0, 100, 101)
print(y)

y = np.logspace(0, 10, 11)
print(y)

[  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  13.
  14.  15.  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.  26.  27.
  28.  29.  30.  31.  32.  33.  34.  35.  36.  37.  38.  39.  40.  41.
  42.  43.  44.  45.  46.  47.  48.  49.  50.  51.  52.  53.  54.  55.
  56.  57.  58.  59.  60.  61.  62.  63.  64.  65.  66.  67.  68.  69.
  70.  71.  72.  73.  74.  75.  76.  77.  78.  79.  80.  81.  82.  83.
  84.  85.  86.  87.  88.  89.  90.  91.  92.  93.  94.  95.  96.  97.
  98.  99. 100.]
[1.e+00 1.e+01 1.e+02 1.e+03 1.e+04 1.e+05 1.e+06 1.e+07 1.e+08 1.e+09
 1.e+10]


In [43]:
y = np.zeros(10)
print(y)

y = np.zeros((10, 2))
print(y)

y1 = np.zeros_like(y)
print(y1)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [42]:
y = np.ones(10)
print(y)

y = np.ones((10, 2))
print(y)

y1 = np.ones_like(y)
print(y1)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]
[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]


In [47]:
y = np.random.random((10, 3))
y

array([[0.84281802, 0.16563977, 0.31606272],
       [0.74371553, 0.0759661 , 0.82426233],
       [0.3986615 , 0.90084362, 0.89352666],
       [0.44179205, 0.26577335, 0.3773855 ],
       [0.12142507, 0.25007688, 0.32239269],
       [0.1171167 , 0.73637184, 0.25766493],
       [0.84185338, 0.80460354, 0.23434395],
       [0.03124573, 0.80635074, 0.42089767],
       [0.3876257 , 0.02402851, 0.79147764],
       [0.9749466 , 0.63237049, 0.11903628]])