In [99]:
import numpy as np

## DataTypes & Attributes

In [100]:
# NumPy's main dataype is ndarray
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [101]:
type(a1)

numpy.ndarray

In [102]:
a2 = np.array([[1, 2.0, 3.3],
               [4, 5, 6.5]])

a3 = np.array([[[1, 2, 3],
                [2, 4, 5],
                [6, 7, 8]], 
               
              [[10, 11, 12],
               [13, 15, 15],
               [16, 17, 18]]])

In [103]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [104]:
a3

array([[[ 1,  2,  3],
        [ 2,  4,  5],
        [ 6,  7,  8]],

       [[10, 11, 12],
        [13, 15, 15],
        [16, 17, 18]]])

In [105]:
a1.shape

(3,)

In [106]:
a2.shape

(2, 3)

In [107]:
a3.shape

(2, 3, 3)

In [108]:
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [109]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int32'), dtype('float64'), dtype('int32'))

In [110]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [111]:
# Create a DataFrame from a NumPy array
import pandas as pd

df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.5


## Creating Arrays

In [112]:
sample_array = np.array([1, 2, 3])
sample_array

array([1, 2, 3])

In [113]:
# SHIFT+TAB (inside parenthesis) = shows doctring
ones = np.ones((2,3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [114]:
zeros = np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [115]:
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [116]:
random_array = np.random.randint(0, 10, size=(3,5))
random_array

array([[7, 8, 1, 5, 9],
       [8, 9, 4, 3, 0],
       [3, 5, 0, 2, 3]])

In [117]:
random_array_2 = np.random.random((5, 3))
random_array_2

array([[0.79915856, 0.46147936, 0.78052918],
       [0.11827443, 0.63992102, 0.14335329],
       [0.94466892, 0.52184832, 0.41466194],
       [0.26455561, 0.77423369, 0.45615033],
       [0.56843395, 0.0187898 , 0.6176355 ]])

In [118]:
random_array_2 = np.random.rand(5, 3)
random_array_2

array([[0.61209572, 0.616934  , 0.94374808],
       [0.6818203 , 0.3595079 , 0.43703195],
       [0.6976312 , 0.06022547, 0.66676672],
       [0.67063787, 0.21038256, 0.1289263 ],
       [0.31542835, 0.36371077, 0.57019677]])

In [119]:
# Pseudo-random numbers
np.random.seed()

## Manipulating & Comparing Arrays

### Arithmetic

In [120]:
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [121]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [122]:
a1 + ones

array([2., 3., 4.])

In [123]:
a1 - ones

array([0., 1., 2.])

In [124]:
a1 * ones

array([1., 2., 3.])

In [125]:
a2 = np.array([[1. , 2. , 3.3],
               [4. , 5. , 6.5]])
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [126]:
a1 * a2

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.5]])

In [127]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [128]:
a3

array([[[ 1,  2,  3],
        [ 2,  4,  5],
        [ 6,  7,  8]],

       [[10, 11, 12],
        [13, 15, 15],
        [16, 17, 18]]])

In [129]:
a2 * a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [None]:
a1 / ones

In [None]:
a2 / a1

In [None]:
# Floor devision removes decimals (round down)
a2 // a1

In [None]:
# a2 ^ 2
a2 ** 2 

In [None]:
np.square(a2)

In [None]:
np.add(a1, ones)

In [None]:
a1 % 2

In [None]:
a2 % 2

### Aggregation 
#### Aggregation = performing the same operation on a number of things

In [None]:
listy_list = [1, 2, 3]
type(listy_list)

In [None]:
sum(listy_list)

In [None]:
a1

In [None]:
sum(a1)

In [None]:
np.sum(a1)

Use Python methods (`sum()`) on Python datatypes and use NumPy methods on NumPy arrays (`np.sum()`). 

In [None]:
# Create a massive NumPy array
massive_array = np.random.random(100000)
massive_array.size

In [None]:
massive_array[:10]

In [None]:
%timeit sum(massive_array) # Python's sum()
%timeit np.sum(massive_array) # NumPy's np.sum()

In [None]:
a2

In [None]:
np.mean(a2)

In [None]:
np.max(a2)

In [None]:
# Standard Deviation = a measure of how spread out a group of number is from the mean
np.std(a2)

In [None]:
# variance = measure of the average degree to which each number is different from the mean
# higher variance = wider range of numbers
# lower variance = lower range of numbers
np.var(a2)

In [None]:
# Standard Deviation = Square Root of Variance
np.sqrt(np.var(a2))

In [None]:
# Demo of std and var
high_var_array = np.array([1, 100, 200, 300, 4000, 5000])
low_var_array = np.array([2, 4, 6, 8, 10])

In [None]:
np.var(high_var_array), np.var(low_var_array)

In [None]:
np.std(high_var_array), np.std(low_var_array)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(high_var_array)
plt.show()

In [None]:
plt.hist(low_var_array)
plt.show()

## Reshaping and Transposing

In [None]:
a2

In [None]:
a2.shape

In [None]:
a3

In [None]:
a3.shape

In [None]:
# Transpose = switches the axis
a2.T

In [None]:
a2.T.shape

## Dot Product

In [None]:
np.random.seed(0)

mat1 = np.random.randint(10, size=(5, 3))
mat2 = np.random.randint(10, size=(5, 3))

mat1

In [None]:
mat2

In [None]:
mat1.shape, mat2.shape

In [None]:
# Element-wise multiplication (Hadamard product)
mat1 * mat2

In [None]:
# Dot product
# np.dot(mat1, mat2) # doesn't work because inner dimensions dont match per dot product rules

In [None]:
# Transpose mat1
mat1.T

In [None]:
mat1.shape, mat2.T.shape # Inner dimensions now match

In [None]:
mat3 = np.dot(mat1, mat2.T)
mat3

In [None]:
mat3.shape

## Dot product example (Nut Butter sales)

In [None]:
np.random.seed(0)

# Number of jars sold
sales_amounts = np.random.randint(20, size=(5,3))
sales_amounts

In [136]:
# Create weekly_sales DataFrame
weekly_sales = pd.DataFrame(sales_amounts,
                            index=["Mon", "Tues", "Wed", "Thurs", "Fri"],
                            columns=["Almond butter", "Peanut butter", "Cashew butter"])
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [138]:
weekly_sales.shape

(5, 3)

In [None]:
# Create prices array
prices = np.array([10, 8, 12])
prices

In [None]:
prices.shape

In [None]:
# Create butter_prices DataFrame
butter_prices = pd.DataFrame(prices.reshape(1, 3),
                             index=["Price"],
                             columns=["Almond butter", "Peanut butter", "Cashew butter"])
butter_prices

In [None]:
sales_amounts.shape

In [None]:
# total_sales = prices.dot(sales_amounts) # dont work because inner dimension dont match for dot product

In [None]:
sales_amounts.T

In [130]:
# Shape aren't aligned --> transpose
total_sales = prices.dot(sales_amounts.T)
total_sales

array([240, 138, 458, 232, 142])

In [139]:
# Create daily_sales
butter_prices.shape, weekly_sales.shape

((1, 3), (5, 3))

In [140]:
# Transpose weekly sales to get the '3' to be inner number
weekly_sales.T.shape

(3, 5)

In [142]:
daily_sales = butter_prices.dot(weekly_sales.T)
daily_sales

Unnamed: 0,Mon,Tues,Wed,Thurs,Fri
Price,240,138,458,232,142


In [143]:
weekly_sales.shape

(5, 3)

In [144]:
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [None]:
# Doesn't work, not the right shape
# weekly_sales["Total ($)"] = daily_sales
# weekly_sales

In [145]:
weekly_sales["Total ($)"] = daily_sales.T
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter,Total ($)
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142
