# Numpy

NumPy is a powerful linear algebra library for Python. What makes it so important is that almost all of the libraries in the PyData ecosystem (pandas, scipy, scikit-learn, etc.) rely on NumPy as one of their main building blocks.

NumPy is also incredibly fast, as it has bindings to C libraries.

In [1]:
import numpy as np

## A) - Creating Arrays

1. From a python list
2. Built-in methods
        a. arange
        b. zeros, ones, empty
        c. linspace
        d. eye
3. Random
        a. rand
        b. randn
        c. randint
        d. normal

### 1. From a python list

We can create an array by directly converting a list or list of lists.

In [2]:
a = [[1,2,3],[4,5,6],[7,8,9]]
display(a, type(a))

[1, 2, 3]

list

In [3]:
b = np.array(a)
display(b, type(b))

array([1, 2, 3])

numpy.ndarray

### 2. Built-in Methods

In [6]:
# arange

a = np.arange(0,10)
b = np.arange(0,14,2)
display(a, b)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

array([ 0,  2,  4,  6,  8, 10, 12])

In [7]:
# zeros

display(np.zeros(5), np.zeros([5,5]))

array([0., 0., 0., 0., 0.])

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [8]:
# ones

display(np.ones(3), np.ones([3,3]))

array([1., 1., 1.])

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [59]:
# empty

np.empty((2, 3))

array([[1.72723371e-077, 1.72723371e-077, 1.48219694e-323],
       [0.00000000e+000, 0.00000000e+000, 4.17201348e-309]])

In [58]:
# linspace - evenly spaced numbers over a specified interval

np.linspace(0,5,20)

array([0.        , 0.26315789, 0.52631579, 0.78947368, 1.05263158,
       1.31578947, 1.57894737, 1.84210526, 2.10526316, 2.36842105,
       2.63157895, 2.89473684, 3.15789474, 3.42105263, 3.68421053,
       3.94736842, 4.21052632, 4.47368421, 4.73684211, 5.        ])

In [10]:
# eye - identity matrix

np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### 3. Random

Random number arrays

In [11]:
# rand
# creates an array of the given shape and populates it with random samples from a uniform distribution over [0, 1)

display(np.random.rand(2), np.random.rand(2,2))

array([0.80764383, 0.76225714])

array([[0.27036834, 0.85678452],
       [0.083802  , 0.64736165]])

In [12]:
# randn
# returns a sample (or samples) from the "standard normal" distribution

display(np.random.randn(2), np.random.randn(5,5))

array([0.97431599, 0.61222468])

array([[-1.37835409, -1.65646371, -1.10385015,  0.45492453,  0.70092519],
       [-0.8129949 ,  1.86516292,  0.92305086,  1.06490496,  0.49650507],
       [ 0.33722707,  0.1904781 ,  0.47382141,  1.29301402,  0.81578696],
       [ 1.42249774, -0.43088155, -1.86314652,  2.10253013, -1.44900436],
       [-0.28452621,  0.62250103,  1.62261002,  1.93176   ,  0.68675902]])

In [13]:
# randint
# returns random integers from low (inclusive) to high (exclusive)

display(np.random.randint(1,100), np.random.randint(1,100,10))

71

array([38, 26, 93, 99, 99, 50, 82, 28, 69, 25])

In [14]:
# normal
# returns a sample (or samples) from the "normal" distribution

np.random.normal(loc = 5, scale = 2)

4.7041134746096995

## B) - Array Methods and Attributes

1. Reshape
2. Array Information
        a. ndim
        b. shape
        c. size
        d. dtype

### 1. Reshape

returns an array containing the same data with a new shape

In [15]:
arr = np.arange(25)
display(arr, arr.reshape(5,5))

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

### 2. Array Information

In [57]:
arr = np.array([[ 0,  1,  2,  3,  4],
                [ 5,  6,  7,  8,  9],
                [10, 11, 12, 13, 14]])

display(arr.ndim, arr.shape, arr.reshape(5,3).shape, arr.size, arr.dtype)

2

(3, 5)

(5, 3)

15

dtype('int64')

## C) - Indexing and Selection

1. Bracket indexing and selection
2. Broadcasting
3. Indexing a 2D array
3. Conditional selection

### 1. Bracket indexing and selection

In [73]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [70]:
display(arr[0], arr[0:5], arr[5:], arr[-2])

0

array([0, 1, 2, 3, 4])

array([ 5,  6,  7,  8,  9, 10])

9

### 2. Broadcasting

NumPy arrays differ from normal Python lists because of their ability to broadcast. With lists, for example, you can only replace the first 5 elements in a list with a new 5 element list. With NumPy arrays, you can broadcast a single value across a larger set of values.

In [75]:
part = arr[:5]
part[:] = 9999
part

array([9999, 9999, 9999, 9999, 9999])

In [76]:
arr

array([9999, 9999, 9999, 9999, 9999,    5,    6,    7,    8,    9,   10])

Note the changes also occur in our original array! This tells us the data is not copied, it's a view of the original array! Thus, to get a copy, we need to be explicit.

In [77]:
arr_copy = arr.copy()
arr_copy[:] = 10000

display(arr, arr_copy)

array([9999, 9999, 9999, 9999, 9999,    5,    6,    7,    8,    9,   10])

array([10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000,
       10000, 10000])

### 3. Indexing a 2D array

In [65]:
arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [29]:
# select row

arr_2d[0]

array([ 5, 10, 15])

In [30]:
# a single value

arr_2d[1,0]

20

In [31]:
# slicing

arr_2d[:2,1:]

array([[10, 15],
       [25, 30]])

In [69]:
arr_2d[-2,]

array([20, 25, 30])

### 4. Conditional Selection

In [32]:
arr = np.arange(1,11)
arr>4

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [35]:
arr[arr>8]

array([ 9, 10])

## D) - Operations

1. Arithmetic and aggregations
2. Array functions
3. Axis logic
4. Logic comparisons

### 1. Arithmetic and aggregations

Note that operation between arrays are always applied elementwise

In [44]:
display(arr, arr+arr, arr*arr, arr/arr, arr**3)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

array([  1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

array([   1,    8,   27,   64,  125,  216,  343,  512,  729, 1000])

In [79]:
ranarr = np.random.randint(0,50,10)
display(ranarr, ranarr.max(), ranarr.argmax(), ranarr.min(), ranarr.argmin())

array([ 6, 41, 21, 34, 48, 20, 16, 30, 10, 44])

48

4

6

0

### 2. Array functions

Array functions are essentially just mathematical operations that can be applied across the array.

In [46]:
display(np.sqrt(arr), np.exp(arr), np.sin(arr), np.log(arr))

array([1.        , 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974, 2.64575131, 2.82842712, 3.        , 3.16227766])

array([2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
       1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03,
       8.10308393e+03, 2.20264658e+04])

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427,
       -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111])

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
       1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509])

### 3. Axis logic

When working with 2-dimensional arrays (matrices) we have to consider rows and columns. This becomes very important when we get to the section on pandas. In array terms, axis 0 refers to rows, and axis 1 refers columns. 

In [87]:
arr_2d = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
arr_2d

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

By passing in axis=0, we're returning an array of sums along the vertical axis, essentially [(1+5+9), (2+6+10), (3+7+11), (4+8+12)]

In [90]:
arr_2d.sum(axis=0)

array([15, 18, 21, 24])

In [89]:
arr_2d.sum(axis=1)

array([10, 26, 42])

### 4. Logic comparisons

In [71]:
a = np.array([1, 2, 3])
b = np.array([5, 4, 3])
a == b

array([False, False,  True])

In [72]:
# If we want to compare the entire arrays, we can use Numpy's built in function

np.array_equal(a, b)

False

## E) - Other tips and tricks

1. Sort
2. Transposing array
3. Flatten the array
4. Adding or removing elements
5. Combining arrays

### 1. Sort

In [94]:
a = np.array([[2, 4, 8], [1, 13, 7]])
a

array([[ 2,  4,  8],
       [ 1, 13,  7]])

In [95]:
# column-wise

a.sort(axis=0)
a

array([[ 1,  4,  7],
       [ 2, 13,  8]])

In [96]:
# row-wise

a.sort(axis=1)
a

array([[ 1,  4,  7],
       [ 2,  8, 13]])

### 2. Transposing

In [98]:
np.transpose(a)

array([[ 1,  2],
       [ 4,  8],
       [ 7, 13]])

### 3. Flatten the array

In [100]:
a.ravel()

array([ 1,  4,  7,  2,  8, 13])

### 4. Adding or removing elements

In [106]:
a = np.array([1, 2, 3])
b = np.array([5, 4, 3])

np.append(a,b)

array([1, 2, 3, 5, 4, 3])

In [102]:
# Insert the number '5' at index 1 on axis 0

np.insert(a, 1, 5, axis=0)

array([1, 5, 2, 3])

In [115]:
# Delete item at index 1 on axis 0

np.delete(a, 1, axis=0)

array([1, 3])

### 5. Combining arrays

In [122]:
# Concatenate arrays a and b on axis 0

display(np.concatenate((a, b), axis=0))

array([1, 2, 3, 5, 4, 3])

In [119]:
arr_2d

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [124]:
# Combining multi-dimension array, column-wise

display(np.concatenate((arr_2d, arr_2d), axis=1))

array([[ 1,  2,  3,  4,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  5,  6,  7,  8],
       [ 9, 10, 11, 12,  9, 10, 11, 12]])

In [125]:
# Combining multi-dimension array, row-wise

display(np.concatenate((arr_2d, arr_2d), axis=0))

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [127]:
# Combining, vstack, row-wise

np.vstack((arr_2d, arr_2d))

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [128]:
# Combining, hstack, column-wise

np.hstack((arr_2d, arr_2d))

array([[ 1,  2,  3,  4,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  5,  6,  7,  8],
       [ 9, 10, 11, 12,  9, 10, 11, 12]])