# Numpy Basics

One of the reasons NumPy is so important for numerical computations in Python is because it is designed for efficiency on large arrays of data.

In [2]:
import numpy as np

In [3]:
# Numpy
n_iter = 1_000_000
%timeit np.arange(n_iter) ** 2

3.4 ms ± 103 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
%timeit list((n**2 for n in range(n_iter)))

232 ms ± 4.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%timeit [n**2 for n in range(n_iter)]

219 ms ± 1.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### `ndarray`: A Multidimensional Array Object

In [6]:
# Simple math operations
math_test = np.array(([1.5, 0.2, -1.2], [2.7, -1.9, -1.1]))

print(math_test * 7)  # all values got multiplied by a scalar
print(math_test + math_test)  # corresponding cell gets added to itself

[[ 10.5   1.4  -8.4]
 [ 18.9 -13.3  -7.7]]
[[ 3.   0.4 -2.4]
 [ 5.4 -3.8 -2.2]]


All `ndarrays` have these structural properties:

* Dimension: Number of indices
* Shape: Size of the array in each direction
* Size: Total number of elements in an array

In [7]:
print(f"Dimension: {math_test.ndim}", f"Shape: {math_test.shape}", f"Size: {math_test.size}", sep="\n")

Dimension: 2
Shape: (2, 3)
Size: 6


In [8]:
math_test.dtype

dtype('float64')

### Creating Arrays

In [9]:
data1 = [1, 2, 3.2, 8, 7]
arr1 = np.array(data1)
arr1

array([1. , 2. , 3.2, 8. , 7. ])

In [10]:
data2 = [[1, 9, 3, 5], [3, 5, 2, 8]]
arr2 = np.array(data2)
arr2

array([[1, 9, 3, 5],
       [3, 5, 2, 8]])

In [11]:
[f"arr{i+1}: {d_type}" for i, d_type in enumerate([arr.dtype for arr in [arr1, arr2]])]

['arr1: float64', 'arr2: int64']

Unless explicitly specified, `np.array` will try to infer the dtype for the array that was created.

In [12]:
# Array for given length or shape:
print(np.zeros(5))
print(np.zeros((2, 3)))
print(np.ones((2, 4, 3)))
print(np.empty(5)) # not safe: can contain non-zero values

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]
[0. 0. 0. 0. 0.]


In [13]:
np.arange(1, 10, 2) # Similar to the range function

array([1, 3, 5, 7, 9])

In [14]:
np.identity(5)
#OR
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### Data Type or dtype 

In [15]:
d1 = np.array([1, 2, 3], dtype=np.float64)
d1.dtype

dtype('float64')

In [16]:
d2 = np.array([1.2, 2, 3.1], dtype=np.int32)
d2

array([1, 2, 3], dtype=int32)

dtypes: type name + number of bits per element in the array

In [17]:
d3 = np.array([1, 0, -1], dtype=np.uint8)
d3

array([  1,   0, 255], dtype=uint8)

One can explicitly convert the dtype of an array:
* `astype` method returns a new array

In [18]:
d3.astype(np.int8)

array([ 1,  0, -1], dtype=int8)

In [19]:
d4 = np.array(["4.1", "9.2", "3.9"], dtype=np.string_)
d4

array([b'4.1', b'9.2', b'3.9'], dtype='|S3')

In [20]:
d5 = d4.astype(np.float16)
d5

array([4.1, 9.2, 3.9], dtype=float16)

### Arithmetics

Numpy arrays are important because they enable you to express batch operations without writing a `for` loop. This is called as ***vectorization***. 

![Numpy Broadcasting](./imgs/numpy-broadcasting.png)

In [21]:
d_test1 = np.array([[1.2, 3, 2.1], [2, 2.5, 1.1], [1, 2.1, 3]], dtype=np.float64) # (3, 3)
d_test2 = np.array([1, 2, 3], dtype=np.int8) # (1, 3)
d_test3 = np.array([[1], [2], [3]]) # (3, 1)

print(d_test1 * d_test2)
print(d_test1 / d_test3)

[[1.2 6.  6.3]
 [2.  5.  3.3]
 [1.  4.2 9. ]]
[[1.2        3.         2.1       ]
 [1.         1.25       0.55      ]
 [0.33333333 0.7        1.        ]]


Scalar propogation: numpy broadcasts the scalar value into an array of same shape as the array operand

In [22]:
test_arr0 = d_test1 * d_test2
print(test_arr0)
test_arr1 = 1 / test_arr0
test_arr1

[[1.2 6.  6.3]
 [2.  5.  3.3]
 [1.  4.2 9. ]]


array([[0.83333333, 0.16666667, 0.15873016],
       [0.5       , 0.2       , 0.3030303 ],
       [1.        , 0.23809524, 0.11111111]])

In [23]:
test_arr2 = test_arr0 ** 2
test_arr2

array([[ 1.44, 36.  , 39.69],
       [ 4.  , 25.  , 10.89],
       [ 1.  , 17.64, 81.  ]])

Comparisons between arrays of the same size yield boolean arrays:

In [24]:
test_arr2 > test_arr1

array([[ True,  True,  True],
       [ True,  True,  True],
       [False,  True,  True]])

### Basic Indexing & Slicing

![Numpy Matrix Axes](imgs/np-matrix-axes.png)

In [25]:
# Can't assign values a scalar value to a sliced list.
l1 = [1, 2, 3]
try:
  l1[:2] = 2 # throws a type error
except:
  pass
l1

[1, 2, 3]

In [26]:
ages = np.array([2, 3, 4, 5, 7, 1, 5])
arr_sliced1 = ages[2:5]
arr_sliced1[0:2] = 0
# Changes are reflected in the original array.
print(ages, arr_sliced1)

[2 3 0 0 7 1 5] [0 0 7]


If we assign a scalar value to the slice, it propogates or broadcasts the value to fill those indices. From the above example we see that changes in `arr_sliced1` are reflected on the original array `ages`. So, we can conclude that assigning new values using slice can mutate the original array.

In [27]:
arr_sliced1[:] = 7
print(ages, arr_sliced1)

[2 3 7 7 7 1 5] [7 7 7]


In [28]:
arr2d0 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d0)
arr2d0[2][0]
# OR
arr2d0[2, 0]

[[1 2 3]
 [4 5 6]
 [7 8 9]]


7

In [29]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print(arr3d)
print(f"Shape: {arr3d.shape} ~ 2 layers, 2 rows, 3 columns")

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
Shape: (2, 2, 3) ~ 2 layers, 2 rows, 3 columns


In [30]:
old_arr3d = arr3d[0].copy()

arr3d[0] = 7
print(arr3d)
arr3d[0] = old_arr3d

[[[ 7  7  7]
  [ 7  7  7]]

 [[ 7  8  9]
  [10 11 12]]]


In [31]:
arr3d[1, 1, 0]

10

In [32]:
print(arr2d0)
# first 2 rows
print("first 2 rows:", arr2d0[:2])
# first 2 rows, first col
print("first 2 rows, first col:", arr2d0[:2, 0])
# last 2 cols of 2nd row
print("last 2 cols of 2nd row:", arr2d0[1, 1:])


[[1 2 3]
 [4 5 6]
 [7 8 9]]
first 2 rows: [[1 2 3]
 [4 5 6]]
first 2 rows, first col: [1 4]
last 2 cols of 2nd row: [5 6]


In [33]:
# 2d array got sliced to 1d
lower_ndim = arr2d0[1:, 0]
print(lower_ndim)
lower_ndim.shape

[4 7]


(2,)

In [34]:
# last col
print(arr2d0[:, 2:])
# add a square made of 0s in the top right-hand side of the matrix
old_arr2d0 = arr2d0.copy()
arr2d0[:2, 1:] = 0
print(arr2d0)
arr2d0 = old_arr2d0

[[3]
 [6]
 [9]]
[[1 0 0]
 [4 0 0]
 [7 8 9]]


In [35]:
print(arr2d0[2:, :].shape)
print(arr2d0[2, :].shape)

(1, 3)
(3,)


In [36]:
# first 2, middle row
arr2d0[1, :2]

array([4, 5])

### Boolean Indexing

Let's assume that the following data sets are related.

In [37]:
names = np.array(["Bob", "Vik", "Joe", "Vik", "Bob", "Joe", "Vik"])
# Half yearly profit in m $ million
profit = np.array([[1, 2], [5, 8], [1, -2], [9, 7], [3, 2], [-7, 7], [9, 7]])

cond = names == "Vik"
# ~ is to negate the condition, same as saying names != "Vik"
vik_profit = profit[cond]
vik_profit

array([[5, 8],
       [9, 7],
       [9, 7]])

In [38]:
rest_second_half_prof = profit[~cond, 1:]
rest_second_half_prof

array([[ 2],
       [-2],
       [ 2],
       [ 7]])

**Note**: Python keywords `and` and `or` does not work with boolean arrays. We need to use `&` and `|` operators instead.

In [39]:
not_vik_cond = (names == "Joe") | (names == "Bob") # lame example
profit[not_vik_cond]

array([[ 1,  2],
       [ 1, -2],
       [ 3,  2],
       [-7,  7]])

In [40]:
# set all negative values in profit to zero
profit[profit < 0] = 0  # only the values that satisify the condition are set as zero
profit

array([[1, 2],
       [5, 8],
       [1, 0],
       [9, 7],
       [3, 2],
       [0, 7],
       [9, 7]])

### Fancy Indexing

In [41]:
fancy_arr = np.zeros((8, 4))

for i in range(8):
  fancy_arr[i] = i

fancy_arr = fancy_arr.astype(np.int32)
# fancy indexing, by using an iterable of row index
fancy_arr[[4, 0, 1, -2]]

array([[4, 4, 4, 4],
       [0, 0, 0, 0],
       [1, 1, 1, 1],
       [6, 6, 6, 6]], dtype=int32)

In [42]:
fan_arr1 = np.arange(32).reshape((8, 4))
fan_arr1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [43]:
fan_arr1[[2, 3, 1, 0]]

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [ 4,  5,  6,  7],
       [ 0,  1,  2,  3]])

In [44]:
fan_arr1[[2, 3, 1, 0], np.arange(4)]  # diagnal [\]

array([ 8, 13,  6,  3])

In [45]:
fan_arr1[[2, 3, 1, 0], np.arange(3, -1, -1)]  # diagnal [/]

array([11, 14,  5,  0])

In [46]:
fan_arr1[[4, 1], [3, 2]]  # Elements at (4, 3) & (1, 2) were selected

array([19,  6])

In [47]:
# print(fan_arr1)
# gives us a subset of the matrix (shuffled cols)
fan_arr1[[2, 0, 1]][:, [2, 1, 0, 3]]

array([[10,  9,  8, 11],
       [ 2,  1,  0,  3],
       [ 6,  5,  4,  7]])

Fancy indexing, unlike slicing, always copies the data into a new array, only when assigning it to a new variable.

In [48]:
new_fan = np.arange(16).reshape(4, 4)
# If we don't assign this to a new variable, it will mutate the original array
new_fan[[2, 3, 1], [1, 3, 0]] = 0
# Indexed values were modified in the original
print(new_fan)

[[ 0  1  2  3]
 [ 0  5  6  7]
 [ 8  0 10 11]
 [12 13 14  0]]


### Transposing Arrays & Swapping Axes

Transposing returns a view of the data without copying anything.

In [54]:
trans_arr = np.arange(15).reshape((5, 3))
trans_arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [55]:
trans_arr.T

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

Inner matrix product, using `np.dot`

In [59]:
np.dot(trans_arr.T, trans_arr)  # shape: (3, 3)

array([[270, 300, 330],
       [300, 335, 370],
       [330, 370, 410]])

In [60]:
# we can also use the @ infix operator
trans_arr @ trans_arr.T # shape: (5, 5)

array([[  5,  14,  23,  32,  41],
       [ 14,  50,  86, 122, 158],
       [ 23,  86, 149, 212, 275],
       [ 32, 122, 212, 302, 392],
       [ 41, 158, 275, 392, 509]])

In [73]:
trans_3d = np.arange(24).reshape((2, 3, 4))
trans_3d

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

![3D Matrix Transpose](imgs/3d-matrix-transpose.png)

In [74]:
# we can provide the order of axes
trans_3d.transpose(1, 0, 2)

array([[[ 0,  1,  2,  3],
        [12, 13, 14, 15]],

       [[ 4,  5,  6,  7],
        [16, 17, 18, 19]],

       [[ 8,  9, 10, 11],
        [20, 21, 22, 23]]])

![3D Matrix Transposed](imgs/3d-matrix-transposed.png)

In the above figure, we can understand that axes have been transposed in the given order.

In [80]:
trans_3d.swapaxes(0,1) # same result as the above transpose

array([[[ 0,  1,  2,  3],
        [12, 13, 14, 15]],

       [[ 4,  5,  6,  7],
        [16, 17, 18, 19]],

       [[ 8,  9, 10, 11],
        [20, 21, 22, 23]]])

In [82]:
trans_arr.swapaxes(1, 0)

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])