# NumPy DataTypes and Attributes

In [1]:
import numpy as np

## NumPy's main datatype is ndarray

In [2]:
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [4]:
a2 = np.array(
    [
        [1.8, 2, 3.6],
        [4.0, 5, 6.1]
    ]
)

a2

array([[1.8, 2. , 3.6],
       [4. , 5. , 6.1]])

In [5]:
a3 = np.array(
    [
        [
            [1, 2, 3],
            [3, 4, 5],
            [6, 7, 8]
        ],
        [
            [10 ,11, 12],
            [13, 14, 15],
            [16, 17, 18]
        ]
    ],
    dtype=np.int16
)
a3

array([[[ 1,  2,  3],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]], dtype=int16)

In [6]:
print(a1.shape)
print(a2.shape)
print(a3.shape)

(3,)
(2, 3)
(2, 3, 3)


In [7]:
a1.ndim, a2.ndim, a3.ndim,

(1, 2, 3)

In [8]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int32'), dtype('float64'), dtype('int16'))

In [9]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [10]:
type(a1), type(a2), type(a3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

## Create a DataFrame form a NumPy Array

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.8,2.0,3.6
1,4.0,5.0,6.1


# Creating NumPy Arrays

In [13]:
ones = np.ones(2)
ones

array([1., 1.])

In [14]:
ones.dtype

dtype('float64')

In [15]:
ones = np.ones(shape=(2,3), dtype=np.int16)
ones

array([[1, 1, 1],
       [1, 1, 1]], dtype=int16)

In [16]:
zeros = np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [17]:
range_array = np.arange(start=0,stop=10,step=2)
range_array

array([0, 2, 4, 6, 8])

In [18]:
random_array = np.random.randint(low=0, high=10, size=(3,5))
random_array

array([[8, 4, 8, 2, 0],
       [3, 1, 3, 4, 9],
       [6, 1, 3, 1, 9]])

In [19]:
np.random.random(size=(5,5))

array([[2.38047392e-01, 4.07434857e-01, 2.07674950e-01, 6.10571852e-01,
        7.03649778e-01],
       [9.62239418e-02, 8.44939712e-01, 3.16911842e-01, 5.93586548e-01,
        4.51211730e-01],
       [9.42980795e-01, 6.75526570e-01, 4.65206461e-04, 3.25755291e-01,
        5.03612818e-01],
       [4.66859606e-01, 3.39722029e-01, 7.28337005e-01, 8.60582485e-01,
        9.42055888e-01],
       [1.83767408e-01, 2.16894786e-01, 3.55144814e-01, 9.69633366e-01,
        9.71370198e-01]])

In [20]:
np.random.rand(2,3)

array([[0.13229133, 0.34733151, 0.75000959],
       [0.82564904, 0.48107554, 0.82965057]])

# NumPy Random Seed

## Pseudo random numbers

In [21]:
np.random.seed(seed=0)
np.random.randint(10, size=(5,3))

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

In [22]:
np.random.seed(seed=0)
np.random.randint(10, size=(5,3))

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

In [23]:
np.random.randint(10, size=(5,3))

array([[6, 7, 7],
       [8, 1, 5],
       [9, 8, 9],
       [4, 3, 0],
       [3, 5, 0]])

In [24]:
np.random.seed(5)
np.random.random((5, 3)) 

array([[0.22199317, 0.87073231, 0.20671916],
       [0.91861091, 0.48841119, 0.61174386],
       [0.76590786, 0.51841799, 0.2968005 ],
       [0.18772123, 0.08074127, 0.7384403 ],
       [0.44130922, 0.15830987, 0.87993703]])

# Viewing Arrays and Matrices

In [25]:
random_array = np.random.randint(0,10,(3,3))
random_array

array([[4, 6, 2],
       [9, 9, 9],
       [9, 1, 2]])

In [26]:
np.unique(random_array)

array([1, 2, 4, 6, 9])

In [27]:
a1[0]

1

In [28]:
a2[0]

array([1.8, 2. , 3.6])

In [29]:
a3

array([[[ 1,  2,  3],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]], dtype=int16)

In [30]:
a3[0]

array([[1, 2, 3],
       [3, 4, 5],
       [6, 7, 8]], dtype=int16)

In [31]:
a3[:2,:2,:2]

array([[[ 1,  2],
        [ 3,  4]],

       [[10, 11],
        [13, 14]]], dtype=int16)

In [32]:
a3[:,:,:2]

array([[[ 1,  2],
        [ 3,  4],
        [ 6,  7]],

       [[10, 11],
        [13, 14],
        [16, 17]]], dtype=int16)

# Manipulating Arrays

## Arithmetic

In [33]:
a1

array([1, 2, 3])

In [34]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [35]:
a1 + ones

array([2., 3., 4.])

In [36]:
a1 - ones

array([0., 1., 2.])

In [37]:
a1 * ones

array([1., 2., 3.])

In [38]:
a1

array([1, 2, 3])

In [39]:
a2

array([[1.8, 2. , 3.6],
       [4. , 5. , 6.1]])

In [40]:
a1 * a2

array([[ 1.8,  4. , 10.8],
       [ 4. , 10. , 18.3]])

In [41]:
a1.shape, a2.shape

((3,), (2, 3))

In [42]:
# (2, 3)
# ( , 3)

In [43]:
a3*a1

array([[[ 1,  4,  9],
        [ 3,  8, 15],
        [ 6, 14, 24]],

       [[10, 22, 36],
        [13, 28, 45],
        [16, 34, 54]]])

In [44]:
a3

array([[[ 1,  2,  3],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]], dtype=int16)

In [45]:
a4 = np.ones(shape=(3,3))*2
a4

array([[2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.]])

In [46]:
a3*a4

array([[[ 2.,  4.,  6.],
        [ 6.,  8., 10.],
        [12., 14., 16.]],

       [[20., 22., 24.],
        [26., 28., 30.],
        [32., 34., 36.]]])

In [47]:
a3.shape, a4.shape

((2, 3, 3), (3, 3))

In [48]:
# (2, 3, 3) 
# ( , 3, 3)

In [49]:
print(a1)
print(ones)
a1/ones

[1 2 3]
[1. 1. 1.]


array([1., 2., 3.])

In [50]:
print(a2)
print(a1)
# floor division remove the decimal (rounds down)
print(a2 / a1)
print(a2 // a1)

[[1.8 2.  3.6]
 [4.  5.  6.1]]
[1 2 3]
[[1.8        1.         1.2       ]
 [4.         2.5        2.03333333]]
[[1. 1. 1.]
 [4. 2. 2.]]


In [51]:
print(a1)
print(a1 ** 2)
print(np.square(a1))

[1 2 3]
[1 4 9]
[1 4 9]


In [52]:
np.add(a1, ones)

array([2., 3., 4.])

In [53]:
a1 % 2

array([1, 0, 1], dtype=int32)

In [54]:
np.exp(a1)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [55]:
np.log(a1)

array([0.        , 0.69314718, 1.09861229])

## Aggregation
### Performing the same operation on number of things

In [56]:
listy_list = [1, 2, 3]
print(type(listy_list))
print(sum(listy_list))

<class 'list'>
6


In [57]:
type(a1)

numpy.ndarray

In [58]:
sum(a1), np.sum(a1)

(6, 6)

### Use python's methods (`sum()`) for python datatypes
### Use NumPy's methods (`np.sum()`) for NumPy datatypes

In [59]:
massive_array = np.random.random(100000)
massive_array[:5]

array([0.01458075, 0.09336303, 0.82655425, 0.83349274, 0.89241102])

In [60]:
%timeit sum(massive_array)
%timeit np.sum(massive_array)

43.5 ms ± 7.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
109 µs ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [61]:
np.mean(a2), np.std(a2),np.sqrt(np.var(a2)), np.var(a2), np.square(np.std(a2)) 

(3.75,
 1.5294334463018213,
 1.5294334463018213,
 2.339166666666666,
 2.339166666666666)

In [62]:
np.max(a2), np.min(a2)

(6.1, 1.8)

# Standard Deviation and Variance

In [63]:
high_var_array = [1, 100, 200, 300, 4000, 5000]
low_var_array = [2, 4, 6, 8, 10]

In [64]:
np.mean(high_var_array), np.mean(low_var_array)

(1600.1666666666667, 6.0)

In [65]:
np.var(high_var_array), np.var(low_var_array)

(4296133.472222221, 8.0)

In [66]:
np.std(high_var_array), np.std(low_var_array)

(2072.711623024829, 2.8284271247461903)

In [67]:
mean = np.sum(low_var_array)/len(low_var_array)
var = np.sum(np.square(low_var_array - mean))/len(low_var_array)
std = np.sqrt(var)

mean, var, std

(6.0, 8.0, 2.8284271247461903)

# Reshape and Transpose

In [68]:
a2

array([[1.8, 2. , 3.6],
       [4. , 5. , 6.1]])

In [69]:
a2.reshape(1,-1)

array([[1.8, 2. , 3.6, 4. , 5. , 6.1]])

In [70]:
a2.shape

(2, 3)

In [71]:
a2_reshape = a2.reshape(2,3,1)
a2_reshape, a2_reshape.shape

(array([[[1.8],
         [2. ],
         [3.6]],
 
        [[4. ],
         [5. ],
         [6.1]]]),
 (2, 3, 1))

In [72]:
a3, a3.shape

(array([[[ 1,  2,  3],
         [ 3,  4,  5],
         [ 6,  7,  8]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]], dtype=int16),
 (2, 3, 3))

In [73]:
a3 * a2_reshape

array([[[  1.8,   3.6,   5.4],
        [  6. ,   8. ,  10. ],
        [ 21.6,  25.2,  28.8]],

       [[ 40. ,  44. ,  48. ],
        [ 65. ,  70. ,  75. ],
        [ 97.6, 103.7, 109.8]]])

In [74]:
a2, a2.shape

(array([[1.8, 2. , 3.6],
        [4. , 5. , 6.1]]),
 (2, 3))

In [75]:
a2.T, a2.T.shape

(array([[1.8, 4. ],
        [2. , 5. ],
        [3.6, 6.1]]),
 (3, 2))

# Dot Product vs Element Wise

In [76]:
np.random.seed(0)
mat1 = np.random.randint(10, size=(5,3))
mat2 = np.random.randint(10, size=(5,3))

mat1, mat2, (mat1 * mat2) # Element-wise multiplication (Hadamard product) or np.multiply(mat1, mat2)

(array([[5, 0, 3],
        [3, 7, 9],
        [3, 5, 2],
        [4, 7, 6],
        [8, 8, 1]]),
 array([[6, 7, 7],
        [8, 1, 5],
        [9, 8, 9],
        [4, 3, 0],
        [3, 5, 0]]),
 array([[30,  0, 21],
        [24,  7, 45],
        [27, 40, 18],
        [16, 21,  0],
        [24, 40,  0]]))

In [77]:
(mat1.shape, mat2.T.shape), (mat1.T.shape, mat2.shape)

(((5, 3), (3, 5)), ((3, 5), (5, 3)))

In [78]:
# Dot product
np.dot(mat1, mat2.T), np.dot(mat1.T, mat2)

(array([[ 51,  55,  72,  20,  15],
        [130,  76, 164,  33,  44],
        [ 67,  39,  85,  27,  34],
        [115,  69, 146,  37,  47],
        [111,  77, 145,  56,  64]]),
 array([[121, 114,  77],
        [153, 108,  80],
        [135,  69,  84]]))

# Exercise Nut Butter Store Sales

In [79]:
'''
day      | almond butter | cashew butter | peanut butter | daily sales
________________________________________________________________
Monday   |        2              7               1           88
Tuesday  |        9              4              16          314
Wednesday|       11             14              18          438
Thursday |       13             13              16          426
Friday   |       15             18               9          402


   _    | almond butter | cashew butter | peanut butter
_______________________________________________________
price          10               8               12
'''

'\nday      | almond butter | cashew butter | peanut butter | daily sales\n________________________________________________________________\nMonday   |        2              7               1           88\nTuesday  |        9              4              16          314\nWednesday|       11             14              18          438\nThursday |       13             13              16          426\nFriday   |       15             18               9          402\n\n\n   _    | almond butter | cashew butter | peanut butter\n_______________________________________________________\nprice          10               8               12\n'

In [80]:
Nut_Butter_Store_Sales = pd.DataFrame(
    [
        [2, 7, 1],
        [9, 4, 16],
        [11, 14, 18],
        [13, 13, 16],
        [15, 18, 9]
    ],
    index=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
    columns=['almond butter', 'cashew butter', 'peanut butter']
)

Nut_Butter_Store_Sales

Unnamed: 0,almond butter,cashew butter,peanut butter
Monday,2,7,1
Tuesday,9,4,16
Wednesday,11,14,18
Thursday,13,13,16
Friday,15,18,9


In [81]:
Nut_Butter_Price = pd.DataFrame(
    np.array([10, 8, 12]).reshape(1,-1),
#   np.array([10, 8, 12]).reshape(1, 3),
    index=['price'],
    columns=['almond butter', 'cashew butter', 'peanut butter']
)

Nut_Butter_Price

Unnamed: 0,almond butter,cashew butter,peanut butter
price,10,8,12


In [82]:
daily_sales = np.dot(Nut_Butter_Store_Sales.values, Nut_Butter_Price.values.T)
daily_sales

array([[ 88],
       [314],
       [438],
       [426],
       [402]], dtype=int64)

In [83]:
Nut_Butter_Store_Sales['daily sales'] = daily_sales

Nut_Butter_Store_Sales

Unnamed: 0,almond butter,cashew butter,peanut butter,daily sales
Monday,2,7,1,88
Tuesday,9,4,16,314
Wednesday,11,14,18,438
Thursday,13,13,16,426
Friday,15,18,9,402


# Comparison Operators

In [84]:
a1, a2

(array([1, 2, 3]),
 array([[1.8, 2. , 3.6],
        [4. , 5. , 6.1]]))

In [85]:
(a1 > a2), (a1 > a2).sum() 

(array([[False, False, False],
        [False, False, False]]),
 0)

In [86]:
(a1 >= a2), (a1 >= a2).sum() 

(array([[False,  True, False],
        [False, False, False]]),
 1)

In [87]:
a1 > 1

array([False,  True,  True])

In [88]:
a1 == 1

array([ True, False, False])

In [89]:
a1 == a2

array([[False,  True, False],
       [False, False, False]])

# Sorting Arrays

In [90]:
random_array = np.random.randint(10,size=(3,5))
random_array

array([[2, 3, 8, 1, 3],
       [3, 3, 7, 0, 1],
       [9, 9, 0, 4, 7]])

In [91]:
np.sort(random_array, axis=0)

array([[2, 3, 0, 0, 1],
       [3, 3, 7, 1, 3],
       [9, 9, 8, 4, 7]])

In [92]:
np.sort(random_array, axis=1)

array([[1, 2, 3, 3, 8],
       [0, 1, 3, 3, 7],
       [0, 4, 7, 9, 9]])

In [93]:
random_array ,np.argsort(random_array)

(array([[2, 3, 8, 1, 3],
        [3, 3, 7, 0, 1],
        [9, 9, 0, 4, 7]]),
 array([[3, 0, 1, 4, 2],
        [3, 4, 0, 1, 2],
        [2, 3, 4, 0, 1]], dtype=int64))

In [94]:
a1

array([1, 2, 3])

In [95]:
a1.argsort()

array([0, 1, 2], dtype=int64)

In [96]:
a1.argmin()

0

In [97]:
a1.argmax()

2

In [98]:
random_array.argmax(axis=0)

array([2, 2, 0, 2, 2], dtype=int64)

# Turn Images Into NumPy Arrays

<img src='panda.png'/>

In [99]:
from matplotlib.image import imread

In [103]:
panda = imread('panda.png')
panda.shape, type(panda)

((2330, 3500, 3), numpy.ndarray)

In [105]:
panda[:1]

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]]], dtype=float32)