In [1]:
import numpy as np

## NumPy Array and its Attributes

In [2]:
# NumPy's main datatype is ndarray (a.k.a n-dimentional array or multidimentional array)

In [2]:
# vector shape = (1,3) Row*Column or (3,)

a1 = np.array([1,2,3])

a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [19]:
# matrix shape = (2,3)

a2 = np.array([[1,2,3],
               [4,5,6]])

a2

array([[1, 2, 3],
       [4, 5, 6]])

In [12]:
# matrix shape = (2,3,3) layers*row*column

a3 = np.array([[[1,2,3],[4,5,6],[7,8,9]], [[10,11,12],[13,14,15],[16,17,18]]])

a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

!["Anatomy of an Array"](anatomy.png)

In [25]:
print(f"a1 shape = {a1.shape}")
print(f"a2 shape = {a2.shape}")
print(f"a3 shape = {a3.shape}")

a1 shape = (3,)
a2 shape = (2, 3)
a3 shape = (2, 3, 3)


In [27]:
# no of dimensions
print(f"a1.ndim = {a1.ndim}")
print(f"a2.ndim = {a2.ndim}")
print(f"a3.ndim = {a3.ndim}")

a1.ndim = 1
a2.ndim = 2
a3.ndim = 3


In [30]:
# total num of items in the array

a1.size, a2.size, a3.size

(3, 6, 18)

In [34]:
# check the data types of the items in the matrix

a3.dtype

dtype('int32')

In [41]:
# check the datatye of the matrix

type(a3)

numpy.ndarray

In [36]:
# Most ML tools are built on top of NumPy
# Data fed into models need to be converted to numbers
# NumPy does that very fast and effeciently cz it is built using C
# Machine learning boils down to finding patterns in the numbers within ndarrays 

In [37]:
# create a DataFrame from a NumPy Array

import pandas as pd

df = pd.DataFrame(a2)

df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


## Creating Arrays

In [39]:
manual_array = np.array([1,2,3])
manual_array

array([1, 2, 3])

In [5]:
ones_array = np.ones(shape=(2,3,3), dtype=np.int64)

ones_array

array([[[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]]], dtype=int64)

In [47]:
zeros_array = np.zeros(shape=(2,3,3), dtype=np.int64)

zeros_array

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=int64)

In [48]:
range_array = np.arange(3)

range_array_2 = np.arange(1,5)

range_array, range_array_2

(array([0, 1, 2]), array([1, 2, 3, 4]))

In [52]:
random_array = np.random.randint(low=1, high=10, size=(3,3))

random_array

array([[6, 7, 8],
       [4, 5, 8],
       [7, 1, 1]])

In [54]:
random_array_2 = np.random.random(size=(3,3))

random_array_2

array([[0.88097002, 0.20091965, 0.86287399],
       [0.7111912 , 0.45960057, 0.87420375],
       [0.54625928, 0.38830958, 0.90783139]])

In [55]:
np.random.seed(7)

random_array_3 = np.random.randint(low=1, high=5, size=(2,2))

random_array_3

array([[4, 1],
       [2, 3]])

In [56]:
np.random.seed(7)

random_array_4 = np.random.randint(low=1, high=5, size=(2,2))

random_array_4

array([[4, 1],
       [2, 3]])

In [57]:
# notice how random_array_3 and 4 remain the same
# seed() ensures that random results are reproducable

In [59]:
# find the unique values in a matrix/array

np.unique(ones_array)

array([1], dtype=int64)

## Viewing matrices

### 3D Array

!["3d array"](3d.png)

### 4D Array

!["4d array"](4d.png)

#### After looking at the above examples, we see a pattern here. An n-dimensional array is a collection of n-1 dimensional arrays, for n > 0.

In [None]:
"""
(2,3,4,2,3) = 2_ 4_D arrays of 4_ matrices of 2_ rows and 3_ columns
"""

In [61]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [62]:
a2[0]

array([1, 2, 3])

In [63]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [64]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [6]:
random_array_5 = np.random.randint(low=1, high=3, size=(2,2,3,3))
random_array_5

array([[[[2, 2, 1],
         [2, 2, 2],
         [1, 1, 2]],

        [[1, 1, 1],
         [2, 1, 2],
         [2, 1, 1]]],


       [[[1, 2, 2],
         [1, 2, 2],
         [2, 2, 1]],

        [[1, 1, 1],
         [2, 1, 2],
         [1, 2, 1]]]])

In [75]:
random_array_5.ndim

4

### Slicing

In [7]:
random_array_5

array([[[[2, 2, 1],
         [2, 2, 2],
         [1, 1, 2]],

        [[1, 1, 1],
         [2, 1, 2],
         [2, 1, 1]]],


       [[[1, 2, 2],
         [1, 2, 2],
         [2, 2, 1]],

        [[1, 1, 1],
         [2, 1, 2],
         [1, 2, 1]]]])

In [10]:
random_array_5[:,:,:1,:1]

array([[[[2]],

        [[1]]],


       [[[1]],

        [[1]]]])

In [9]:
random_array_5[:,:1]

array([[[[2, 2, 1],
         [2, 2, 2],
         [1, 1, 2]]],


       [[[1, 2, 2],
         [1, 2, 2],
         [2, 2, 1]]]])

## Manipulating & Comparing Arrays


### Arithmetic


In [11]:
a1

array([1, 2, 3])

In [15]:
ones_array = np.ones(3)
ones_array

array([1., 1., 1.])

In [16]:
a1 + ones_array

array([2., 3., 4.])

In [17]:
a1 - ones_array

array([0., 1., 2.])

In [20]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [21]:
a1 * a2

array([[ 1,  4,  9],
       [ 4, 10, 18]])

**NOTE**
- **Braodcasting** - how numpy treats arrays with different shapes during arithmetic operations
- The smaller array is broadcast to the bigger array so that the result has a similar shape to the larger array
- When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing (i.e. rightmost) dimensions and works its way left.
- Two dimensions are compatible when: they are equal (i.e a1.ndim = 3 and a2.ndim = 3), or one of them is 1
- Read more: [Numpy Docs](https://numpy.org/doc/stable/user/basics.broadcasting.html)

In [23]:
a3 = np.random.randint(low=1, high=5, size=(2,3,3))
a3

array([[[4, 3, 4],
        [2, 2, 3],
        [3, 2, 4]],

       [[3, 3, 2],
        [1, 4, 4],
        [1, 1, 4]]])

In [24]:
a2*a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [30]:
a2.shape, a3.shape

((2, 3), (2, 3, 3))

In [25]:
a4 = np.arange(4)
a5 = np.arange(5)

In [26]:
a4

array([0, 1, 2, 3])

In [27]:
a5

array([0, 1, 2, 3, 4])

In [28]:
a4*a5

ValueError: operands could not be broadcast together with shapes (4,) (5,) 

In [29]:
a4.shape, a5.shape

((4,), (5,))

**NOTE**
- numpy has methods that perform these arithmetic operations:
    `np.add()`
    `np.square()`
    etc...

### Aggregation

- Performing the same operation on a number of things
- **NOTE:** Use numpy methods on numpy arrays. Use python's methods on python datatypes
- Example:

if `type(a1)` is *ndarray*

`np.sum(array_1)`✅

`sum(a1)`❌

In [31]:
a1

array([1, 2, 3])

In [32]:
np.sum(a1)

6

#### some of the aggregation fns for numpy:
    
`np.mean()`

`np.min()` & `np.max()`

`np.std()` = standard deviation

`np.var()` = variance

In [35]:
"""
variance - measure of the average degree to which each number is different
to the mean

standard deviation - a measure of how spread out a group of numbers is from the
mean

both are measures of the spread of data

"""

high_var_array = np.array([1,100,300,4000,5000])
low_var_array = np.array([2,4,6,8,10])

In [34]:
np.var(high_var_array), np.var(low_var_array)

(4684848.16, 8.0)

In [36]:
np.std(high_var_array), np.std(low_var_array)

(2164.451006606525, 2.8284271247461903)

In [37]:
np.mean(high_var_array), np.mean(low_var_array)

(1880.2, 6.0)

### Reshaping & Transposing

- `reshape()` - used to modify an existing array to other custom shapes
- `T` - (stands for transpose) used to swap the axis\` 

In [38]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [39]:
a3

array([[[4, 3, 4],
        [2, 2, 3],
        [3, 2, 4]],

       [[3, 3, 2],
        [1, 4, 4],
        [1, 1, 4]]])

In [41]:
a2_reshaped = a2.reshape(2,3,1)
a2_reshaped

array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]])

In [45]:
result = a2_reshaped * a3
result

array([[[ 4,  3,  4],
        [ 4,  4,  6],
        [ 9,  6, 12]],

       [[12, 12,  8],
        [ 5, 20, 20],
        [ 6,  6, 24]]])

In [46]:
result.shape

(2, 3, 3)

In [48]:
result.T

array([[[ 4, 12],
        [ 4,  5],
        [ 9,  6]],

       [[ 3, 12],
        [ 4, 20],
        [ 6,  6]],

       [[ 4,  8],
        [ 6, 20],
        [12, 24]]])

In [49]:
result.T.shape

(3, 3, 2)

### Dot Product

- so far we have been using multiply (\*) arithmetic operation which is basically **Element-wise** multiplication. Also known as **Hadamard product**

- `dot()` works by multiplying rows and columns. See image below for visualization

- [Interactive demo](http://matrixmultiplication.xyz/)

- this is yet another tool we will use to help our model find patterns in the numbers

![](dot-product.jpg)

In [57]:
np.random.seed(0)
a5 = np.random.randint(low=4, high=12, size=(5,3))
a6 = np.random.randint(low=2, high=10, size=(5,3))

In [58]:
a5

array([[ 8, 11,  9],
       [ 4,  7,  7],
       [ 7, 11,  5],
       [ 7,  9,  6],
       [ 8, 11, 10]])

In [59]:
a6

array([[2, 2, 6],
       [4, 3, 8],
       [9, 9, 8],
       [2, 3, 7],
       [3, 7, 2]])

In [60]:
np.dot(a5,a6)

ValueError: shapes (5,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)

In [61]:
a5_transposed = a5.T
a5_transposed

array([[ 8,  4,  7,  7,  8],
       [11,  7, 11,  9, 11],
       [ 9,  7,  5,  6, 10]])

In [62]:
a5_transposed.shape

(3, 5)

In [63]:
np.dot(a5_transposed, a6)

array([[133, 168, 201],
       [200, 246, 295],
       [133, 172, 212]])

In [65]:
a6.dot(a5_transposed)

array([[ 92,  64,  66,  68,  98],
       [137,  93, 101, 103, 145],
       [243, 155, 202, 192, 251],
       [112,  78,  82,  83, 119],
       [119,  75, 108,  96, 121]])

In [66]:
np.dot(a6, a5_transposed)

array([[ 92,  64,  66,  68,  98],
       [137,  93, 101, 103, 145],
       [243, 155, 202, 192, 251],
       [112,  78,  82,  83, 119],
       [119,  75, 108,  96, 121]])

## Sales Example

In [72]:
# create weekly sales data

np.random.seed(0)
sales_array = np.random.randint(20, size=(5,3))
sales_array

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [68]:
import pandas as pd

In [73]:
# create weekly sales dataframe

days = ["Mon", "Tue", "Wed", "Thur", "Fri"]
products = ["Almond Butter", "Peanut Butter", "Cashew Butter"]
sales_df = pd.DataFrame(sales_array, index=idx_array, columns=products)
sales_df

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thur,4,6,12
Fri,1,6,7


In [74]:
# create product prices data

prices = np.array([10,8,12])
prices

array([10,  8, 12])

In [82]:
# create product prices dataframe

prices_df = pd.DataFrame(prices.reshape(1,3), index=["Price"], columns=products)
prices_df

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [83]:
sales_array.shape, prices.shape

((5, 3), (3,))

In [86]:
# Get total sales for each day

total = sales_array.dot(prices)
total

array([240, 138, 458, 232, 142])

In [87]:
# add the total to the sales dataframe

sales_df["Total"] = total.reshape(5,1)
sales_df

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Total
Mon,12,15,0,240
Tue,3,3,7,138
Wed,9,19,18,458
Thur,4,6,12,232
Fri,1,6,7,142


### Comparison Operators

In [88]:
a1

array([1, 2, 3])

In [89]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [90]:
a1 >= a2

array([[ True,  True,  True],
       [False, False, False]])

## Sorting Arrays

In [93]:
a5

array([[ 8, 11,  9],
       [ 4,  7,  7],
       [ 7, 11,  5],
       [ 7,  9,  6],
       [ 8, 11, 10]])

In [94]:
a5_sorted = np.sort(a5)
a5_sorted

array([[ 8,  9, 11],
       [ 4,  7,  7],
       [ 5,  7, 11],
       [ 6,  7,  9],
       [ 8, 10, 11]])

In [95]:
# sort according to the idx of the items

np.argsort(a5)

array([[0, 2, 1],
       [0, 1, 2],
       [2, 0, 1],
       [2, 0, 1],
       [0, 2, 1]], dtype=int64)

In [97]:
# get the max value according to its index

np.argmax(a5, axis=0)

array([0, 0, 4], dtype=int64)

In [98]:
np.argmax(a5, axis=1)

array([1, 1, 1, 1, 1], dtype=int64)

## Converting Images to Arrays

In [99]:
from matplotlib.image import imread

![girl-image](girl.jpg)

In [107]:
girl_array = imread("girl.jpg")
girl_array[:1,]

array([[[187, 165, 152],
        [184, 166, 154],
        [175, 163, 149],
        ...,
        [250, 247, 240],
        [250, 247, 240],
        [250, 247, 240]]], dtype=uint8)

In [101]:
girl_array.shape, girl_array.size, girl_array.ndim

((3000, 2000, 3), 18000000, 3)

## More Resources

[Visual Numpy](http://jalammar.github.io/visual-numpy/)