# Numpy Basics

One of the reasons NumPy is so important for numerical computations in Python is because it is designed for efficiency on large arrays of data.

In [328]:
import numpy as np

In [329]:
# Numpy
n_iter = 1_000_000
%timeit np.arange(n_iter) ** 2

2.11 ms ± 182 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [330]:
%timeit list((n**2 for n in range(n_iter)))

861 ms ± 40.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [331]:
%timeit [n**2 for n in range(n_iter)]

287 ms ± 1.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### `ndarray`: A Multidimensional Array Object

In [332]:
# Simple math operations
math_test = np.array(([1.5, 0.2, -1.2], [2.7, -1.9, -1.1]))

print(math_test * 7)  # all values got multiplied by a scalar
print(math_test + math_test)  # corresponding cell gets added to itself

[[ 10.5   1.4  -8.4]
 [ 18.9 -13.3  -7.7]]
[[ 3.   0.4 -2.4]
 [ 5.4 -3.8 -2.2]]


All `ndarrays` have these structural properties:

* Dimension: Number of indices
* Shape: Size of the array in each direction
* Size: Total number of elements in an array

In [333]:
print(f"Dimension: {math_test.ndim}", f"Shape: {math_test.shape}", f"Size: {math_test.size}", sep="\n")

Dimension: 2
Shape: (2, 3)
Size: 6


In [334]:
math_test.dtype

dtype('float64')

### Creating Arrays

In [335]:
data1 = [1, 2, 3.2, 8, 7]
arr1 = np.array(data1)
arr1

array([1. , 2. , 3.2, 8. , 7. ])

In [336]:
data2 = [[1, 9, 3, 5], [3, 5, 2, 8]]
arr2 = np.array(data2)
arr2

array([[1, 9, 3, 5],
       [3, 5, 2, 8]])

In [337]:
[f"arr{i+1}: {d_type}" for i, d_type in enumerate([arr.dtype for arr in [arr1, arr2]])]

['arr1: float64', 'arr2: int64']

Unless explicitly specified, `np.array` will try to infer the dtype for the array that was created.

In [338]:
# Array for given length or shape:
print(np.zeros(5))
print(np.zeros((2, 3)))
print(np.ones((2, 4, 3)))
print(np.empty(5)) # not safe: can contain non-zero values

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]
[0. 0. 0. 0. 0.]


In [339]:
np.arange(1, 10, 2) # Similar to the range function

array([1, 3, 5, 7, 9])

In [340]:
np.identity(5)
#OR
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### Data Type or dtype 

In [341]:
d1 = np.array([1, 2, 3], dtype=np.float64)
d1.dtype

dtype('float64')

In [342]:
d2 = np.array([1.2, 2, 3.1], dtype=np.int32)
d2

array([1, 2, 3], dtype=int32)

dtypes: type name + number of bits per element in the array

In [343]:
d3 = np.array([1, 0, -1], dtype=np.uint8)
d3

array([  1,   0, 255], dtype=uint8)

One can explicitly convert the dtype of an array:
* `astype` method returns a new array

In [344]:
d3.astype(np.int8)

array([ 1,  0, -1], dtype=int8)

In [345]:
d4 = np.array(["4.1", "9.2", "3.9"], dtype=np.string_)
d4

array([b'4.1', b'9.2', b'3.9'], dtype='|S3')

In [346]:
d5 = d4.astype(np.float16)
d5

array([4.1, 9.2, 3.9], dtype=float16)

### Arithmetics

Numpy arrays are important because they enable you to express batch operations without writing a `for` loop. This is called as ***vectorization***. 

![Numpy Broadcasting](./imgs/numpy-broadcasting.png)

In [347]:
d_test1 = np.array([[1.2, 3, 2.1], [2, 2.5, 1.1], [1, 2.1, 3]], dtype=np.float64) # (3, 3)
d_test2 = np.array([1, 2, 3], dtype=np.int8) # (1, 3)
d_test3 = np.array([[1], [2], [3]]) # (3, 1)

print(d_test1 * d_test2)
print(d_test1 / d_test3)

[[1.2 6.  6.3]
 [2.  5.  3.3]
 [1.  4.2 9. ]]
[[1.2        3.         2.1       ]
 [1.         1.25       0.55      ]
 [0.33333333 0.7        1.        ]]


Scalar propogation: numpy broadcasts the scalar value into an array of same shape as the array operand

In [348]:
test_arr0 = d_test1 * d_test2
print(test_arr0)
test_arr1 = 1 / test_arr0
test_arr1

[[1.2 6.  6.3]
 [2.  5.  3.3]
 [1.  4.2 9. ]]


array([[0.83333333, 0.16666667, 0.15873016],
       [0.5       , 0.2       , 0.3030303 ],
       [1.        , 0.23809524, 0.11111111]])

In [349]:
test_arr2 = test_arr0 ** 2
test_arr2

array([[ 1.44, 36.  , 39.69],
       [ 4.  , 25.  , 10.89],
       [ 1.  , 17.64, 81.  ]])

Comparisons between arrays of the same size yield boolean arrays:

In [350]:
test_arr2 > test_arr1

array([[ True,  True,  True],
       [ True,  True,  True],
       [False,  True,  True]])

### Indexing & Slicing

In [351]:
# Can't assign values a scalar value to a sliced list.
l1 = [1, 2, 3]
try:
  l1[:2] = 2 # throws a type error
except:
  pass
l1

[1, 2, 3]

In [352]:
ages = np.array([2, 3, 4, 5, 7, 1, 5])
arr_sliced1 = ages[2:5]
arr_sliced1[0:2] = 0
# Changes are reflected in the original array.
print(ages, arr_sliced1)

[2 3 0 0 7 1 5] [0 0 7]


If we assign a scalar value to the slice, it propogates or broadcasts the value to fill those indices. From the above example we see that changes in `arr_sliced1` are reflected on the original array `ages`. So, we can conclude that assigning new values using slice can mutate the original array.

In [353]:
arr_sliced1[:] = 7
print(ages, arr_sliced1)

[2 3 7 7 7 1 5] [7 7 7]


In [354]:
arr2d0 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d0)
arr2d0[2][0]
# OR
arr2d0[2, 0]

[[1 2 3]
 [4 5 6]
 [7 8 9]]


7

In [355]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print(arr3d)
print(f"Shape: {arr3d.shape} ~ 2 layers, 2 rows, 3 columns")

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
Shape: (2, 2, 3) ~ 2 layers, 2 rows, 3 columns


In [356]:
old_arr3d = arr3d[0].copy()

arr3d[0] = 7
print(arr3d)
arr3d[0] = old_arr3d

[[[ 7  7  7]
  [ 7  7  7]]

 [[ 7  8  9]
  [10 11 12]]]


In [357]:
arr3d[1, 1, 0]

10

In [358]:
print(arr2d0)
# first 2 rows
print("first 2 rows:", arr2d0[:2])
# first 2 rows, first col
print("first 2 rows, first col:", arr2d0[:2, 0])
# last 2 cols of 2nd row
print("last 2 cols of 2nd row:", arr2d0[1, 1:])


[[1 2 3]
 [4 5 6]
 [7 8 9]]
first 2 rows: [[1 2 3]
 [4 5 6]]
first 2 rows, first col: [1 4]
last 2 cols of 2nd row: [5 6]


In [359]:
# 2d array got sliced to 1d
lower_ndim = arr2d0[1:, 0]
print(lower_ndim)
lower_ndim.shape

[4 7]


(2,)

In [360]:
# last col
print(arr2d0[:, 2:])
# add a square made of 0s in the top right-hand side of the matrix
old_arr2d0 = arr2d0.copy()
arr2d0[:2, 1:] = 0
print(arr2d0)
arr2d0 = old_arr2d0

[[3]
 [6]
 [9]]
[[1 0 0]
 [4 0 0]
 [7 8 9]]


In [366]:
print(arr2d0[2:, :].shape)
print(arr2d0[2, :].shape)

(1, 3)
(3,)


In [368]:
# first 2, middle row
arr2d0[1, :2]

array([4, 5])

### Boolean Indexing