In [None]:
### imports
import numpy, pandas
from matplotlib import pyplot
from matplotlib.image import imread

## Data Types and Attributes

#### Array dimensions

<span style="color:navajowhite">
--- importance of array shape<br>
shape of all input and output data arrays need to be matched for machine learning algorithm

--- number of dimensions<br>
one-dimensional array = vector (python list, pandas series)<br>
two-dimensional array = matrix (pandas dataframe)<br>
n-dimensional array (numpy ndarray)

--- axis names<br>
axis-0 = index = rows<br>
axis-1 = columns
</span>

In [None]:
### one-dimensional array
array1d = numpy.array([1, 2, 3])
array1d

In [None]:
### two-dimensional array
array2d = numpy.array([
    [1, 2.0, 3.3],
    [4, 5, 6.5]])
array2d

In [None]:
### three-dimensional array
array3d = numpy.array([
    [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    [[10, 11, 12], [13, 14, 15], [16, 17, 18]]])
array3d

#### Attributes

In [None]:
### main datatype of numpy is ndarray (= n-dimensional array)
array1d, array2d, array3d, type(array1d), type(array2d), type(array3d)

In [None]:
### ndim attribute = number of dimensions
array1d, array2d, array3d, array1d.ndim, array2d.ndim, array3d.ndim

In [None]:
### shape attribute = size of dimensions (tuple)
array1d, array2d, array3d, array1d.shape, array2d.shape, array3d.shape

In [None]:
### size attribute = number of all items
array1d, array2d, array3d, array1d.size, array2d.size, array3d.size

In [None]:
### dtype attribute = data type of all items
array1d, array2d, array3d, array1d.dtype, array2d.dtype, array3d.dtype

In [None]:
### creating pandas dataframe from numpy ndarray
pandas.DataFrame(array2d)

## Creating Arrays

#### Array generators

In [None]:
### creating array pre-filled with zeros
zeros = numpy.zeros(shape=(2, 3))
zeros

In [None]:
### creating array pre-filled with ones
ones = numpy.ones(shape=(3))
ones

In [None]:
### creating array pre-filled with range
range = numpy.arange(start=0, stop=10, step=2)
range

In [None]:
### creating array pre-filled with random floats (between 0-1)
rand = numpy.random.rand(5, 3)
rand

In [None]:
### creating array pre-filled with random integers
randint = numpy.random.randint(low=0, high=10, size=(3, 5))
randint

#### Pseudo-random numbers

In [None]:
"""
--- concept
pseudo-random numbers are generated by a mathematical formula from an initial number (seed)
when the seed is set in advance, the formula generates the same sequence of "random" numbers

--- usage
to produce a reproducible sequence of "random" numbers
"""

In [None]:
### pseudo-random numbers
numpy.random.seed(99999)
pseudo_array = numpy.random.randint(low=0, high=10, size=(5, 3))
pseudo_array

## Viewing Arrays and Matrices

In [None]:
### finding unique values
pseudo_array, numpy.unique(pseudo_array)

In [None]:
### array > vector indexing
array1[0], array2[0], array3[0]

In [None]:
### array > matrix indexing and slicing
array3[:2, :2, :2]

In [None]:
### indexing practice > display first four numbers of each innermost array
big_array = numpy.random.randint(low=0, high=10, size=(2, 3, 4, 5))
big_array[:, :, :, :4]

## Manipulating Arrays

#### Arithmetic

In [None]:
### addition
array1, ones_array, array1 + ones_array

In [None]:
### subtraction
array1, ones_array, array1 - ones_array

In [None]:
### multiplication
array1, ones_array, array1 * ones_array

In [None]:
### power
array2, array2 ** 2

In [None]:
### exponential (e^x, e = Euler's Number = 2.71828183)
array1, numpy.exp(array1)

In [None]:
### natural logarithm
array1, numpy.log(array1)

In [None]:
### division
array2, array1, array2 / array1

In [None]:
### floor division (rounds down result)
array2, array1, array2 // array1

In [None]:
### modulo
array1, array1 % 2

#### Aggregation

In [None]:
"""
--- definition
aggregation = performing same operation on multiple operands

--- python versus numpy
numpy methods are hundreds of times faster than python functions
python functions are used on python datatypes (sum())
numpy methods are used on numpy arrays (numpy.sum))
"""

In [None]:
### python sum() function and numpy.sum() method
array1, sum(array1), numpy.sum(array1)

In [None]:
### python sum() versus numpy.sum()
massive_array = numpy.random.rand(100000)
%timeit sum(massive_array)
%timeit numpy.sum(massive_array)

In [None]:
### numpy minimum
numpy.min(array2)

In [None]:
### numpy mean
numpy.mean(array2)

In [None]:
### numpy maximum
numpy.max(array2)

In [None]:
### numpy variance
numpy.var(array2)

In [None]:
### numpy standard deviation
numpy.std(array2)

In [None]:
### numpy square root (std = sqrt(var))
numpy.sqrt(numpy.var(array2))

#### Standard Deviation and Variance

In [None]:
### standard deviation concepts
"""
--- resource
https://www.mathsisfun.com/data/standard-deviation.html
"""

In [None]:
### creating arrays
high_var = numpy.array([1, 100, 200, 300, 4000, 5000])
low_var = numpy.array([2, 4, 6, 8, 10])

In [None]:
### computing variance
numpy.var(high_var), numpy.var(low_var)

In [None]:
### computing standard deviation
numpy.std(high_var), numpy.std(low_var)

In [None]:
### plotting high variance array values
pyplot.hist(high_var)
pyplot.show()

In [None]:
### plotting low variance array values
pyplot.hist(low_var)
pyplot.show()

#### Reshaping arrays

In [None]:
"""
broadcasting happens when doing calculations with differently shaped arrays
smaller array is broadcast (repeated) accross larger array
broadcast rules: two dimensions are compatible when they are equal or one of them is 1
broadcastability requires that all corresponding dimensions of two arrays meet broadcast rules
"""

In [None]:
### compatible arrays
array1, array2, array1 * array2

In [None]:
### incompatible arrays > value error
# array2, array3, array2 * array3

In [None]:
### reshaping array2 to be compatible with array3
### array2(2,3) > array2(2,3,1)
array2_reshape = array2.reshape(2, 3, 1)
array2, array2_reshape

In [None]:
### doing arithmetic again
array2_reshape, array3, array2_reshape * array3

#### Transposing arrays

In [None]:
"""
transposing two dimensional arrays swaps axes: rows become columns and columns become rows
transposing more than two dimensions essentially reverses order of dimensions (1,2,3,4) > (4,3,2,1)
"""

In [None]:
### original array
array2, array2.shape

In [None]:
### transposing array
array2_transpose = array2.T
array2_transpose, array2_transpose.shape

In [None]:
### transposing more than two dimensions
array4 = numpy.random.randint(low=1, high=10, size=(1, 2, 3, 4))
array4_transpose = array4.T
array4.shape, array4_transpose.shape

#### Dot product

In [None]:
### dot product multiplication concepts
"""
--- syntax
numpy.dot(matrix1, matrix2) > value error

--- rules
dot product multiplies rows of matrix1 with columns of matrix2, then sums result
column numbers of matrix1 must match row numbers of matrix2
resulting matrix shape is row numbers of matrix1 by column numbers of matrix2

--- example
(x, y) dot (w, z)
inner dimensions must match >>> y must match w
resulting matrix shape takes outer dimensions >>> (x, z)

--- resource
https://www.mathsisfun.com/algebra/matrix-multiplying.html
"""

In [None]:
### creating matrices
numpy.random.seed(0)
matrix1 = numpy.random.randint(10, size=(5, 3))
matrix2 = numpy.random.randint(10, size=(5, 3))
matrix1, matrix2

In [None]:
### element wise multiplication (Hadamard product)
matrix1 * matrix2

In [None]:
### transposing matrix2
matrix2_transpose = matrix2.T
matrix1.shape, matrix2_transpose.shape

In [None]:
### dot product multiplication
numpy.dot(matrix1, matrix2_transpose)

#### Dot product exercise (nut butter sales)

In [None]:
### creating arrays
numpy.random.seed(0)
sales_amounts = numpy.random.randint(20, size=(5,3))
sales_prices = numpy.array([10, 8, 12])
sales_amounts, sales_prices

In [None]:
### creating dataframes
weekly_sales = pandas.DataFrame(
    sales_amounts,
    index=["Mon", "Tue", "Wed", "Thu", "Fri"],
    columns=["Almond butter", "Peanut butter", "Cashew butter"])
butter_prices = pandas.DataFrame(
    sales_prices.reshape(1,3),
    index=["Price"],
    columns=["Almond butter", "Peanut butter", "Cashew butter"])
weekly_sales, butter_prices

In [None]:
### computing total sales
total_sales = butter_prices.dot(weekly_sales.T)
weekly_sales["Total ($)"] = total_sales.T
weekly_sales

## Comparing Arrays

In [None]:
### concepts
"""
numpy uses the same comparison operators as python
comparison operators are also available as numpy methods
comparison is element-wise, broadcasting rules probably apply
results are returned as boolean array with dimensions matching those of bigger array
"""

In [None]:
### comparing array to array
bool_array = array1 <= array2
bool_array, type(bool_array), bool_array.dtype

In [None]:
### comparing array to literal
array1 < 5

## Sorting Arrays

In [None]:
### numpy sort method
"""
sorts values along last (innermost) dimension
returns array with sorted values
"""
randint_array, numpy.sort(randint_array)

In [None]:
### numpy argsort method
"""
sorts values along last (innermost) dimension
returns array with indexes of sorted values
"""
randint_array, numpy.argsort(randint_array)

In [None]:
### numpy argmin method
"""
finds minimum values along last (innermost) dimension
returns array with indexes of minimum values
"""
array1, numpy.argmin(array1)

In [None]:
### numpy argmax method
"""
finds maximum values along last (innermost) dimension
returns array with indexes of maximum values
"""
array1, numpy.argmax(array1)

In [None]:
### axis parameter = 0
"""
numpy sorting method works along axis 0  (rows / vertically)
"""
randint_array, numpy.argmax(randint_array, axis=0)

In [None]:
### axis parameter = 1
"""
numpy sorting method works along axis 1 (columns / horizontally)
"""
randint_array, numpy.argmax(randint_array, axis=1)

## NumPy in Action

<img src="photo-dog.png"/>

In [None]:
### turn image into numpy array
img_array = imread("photo-dog.png")
img_array

In [None]:
### viewing image array attributes
type(img_array), img_array.ndim, img_array.shape, img_array.size

In [None]:
### what is in image array?
"""
image array contains RGB/RGBA values for each pixel
"""
img_array[:3]