In [3]:
import numpy as np

## Numpy Data types and attributes

In [4]:
a_1 = np.array([1, 2, 3])
a_1

array([1, 2, 3])

In [5]:
a_2 = np.array([[1.0, 2.5, 3], [4, 5.7, 6.9]])
a_2

array([[1. , 2.5, 3. ],
       [4. , 5.7, 6.9]])

In [6]:
a_3 = np.array([[[1, 2, 3],
                 [4, 5, 6],
                 [7, 8, 9]],
                [[10, 11, 12],
                 [13, 14, 15],
                 [16, 17, 18]]])
a_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [7]:
a_1.shape

(3,)

In [8]:
a_2.shape

(2, 3)

In [9]:
a_3.shape

(2, 3, 3)

In [10]:
import pandas as pd

In [11]:
df = pd.DataFrame(a_2)
df

Unnamed: 0,0,1,2
0,1.0,2.5,3.0
1,4.0,5.7,6.9


In [12]:
type(a_1), type(a_2), type(a_3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [13]:
a_1.ndim, a_2.ndim, a_3.ndim

(1, 2, 3)

In [14]:
a_1.dtype, a_2.dtype, a_3.dtype

(dtype('int32'), dtype('float64'), dtype('int32'))

In [15]:
a_1.size, a_2.size, a_3.size

(3, 6, 18)

In [16]:
## This checks for the length of the arrays and it doesn't consider size of the array (3D or 2D)
len(a_1), len(a_2), len(a_3)

(3, 2, 2)

## Creating Arrays

In [17]:
sample_array = np.array([1, 2, 3])
sample_array

array([1, 2, 3])

In [18]:
type(sample_array)

numpy.ndarray

In [19]:
sample_array.dtype

dtype('int32')

In [20]:
ones = np.ones((2, 3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [21]:
ones.dtype

dtype('float64')

In [22]:
zeros = np.zeros((2, 3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [23]:
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [24]:
random_1 = np.random.randint(0, 10, size=(3, 5))
random_1

array([[6, 8, 8, 5, 6],
       [9, 6, 5, 2, 6],
       [3, 4, 8, 8, 7]])

In [25]:
np.random.seed(0)
random_2 = np.random.random((5,3))
random_2

array([[0.5488135 , 0.71518937, 0.60276338],
       [0.54488318, 0.4236548 , 0.64589411],
       [0.43758721, 0.891773  , 0.96366276],
       [0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606]])

## Viewing arrays and matrices

In [26]:
#Finding unique elements of an array
np.unique(random_1)

array([2, 3, 4, 5, 6, 7, 8, 9])

In [27]:
random_1

array([[6, 8, 8, 5, 6],
       [9, 6, 5, 2, 6],
       [3, 4, 8, 8, 7]])

In [28]:
random_1[0]

array([6, 8, 8, 5, 6])

In [29]:
random_1.shape

(3, 5)

In [30]:
a_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [31]:
a_3[:2, :2, :2]

array([[[ 1,  2],
        [ 4,  5]],

       [[10, 11],
        [13, 14]]])

In [32]:
np.random.seed()
a_4 = np.random.randint(10, size = (2, 3, 4, 5))
a_4

array([[[[2, 6, 3, 4, 2],
         [5, 3, 7, 8, 8],
         [3, 9, 0, 0, 1],
         [9, 4, 9, 4, 7]],

        [[7, 1, 6, 9, 7],
         [9, 2, 1, 6, 7],
         [7, 6, 8, 5, 6],
         [3, 7, 0, 8, 2]],

        [[1, 9, 6, 9, 0],
         [4, 7, 9, 2, 4],
         [1, 1, 9, 6, 3],
         [6, 7, 4, 9, 7]]],


       [[[1, 0, 5, 8, 9],
         [5, 6, 6, 2, 4],
         [5, 2, 9, 0, 4],
         [7, 7, 6, 7, 6]],

        [[1, 3, 0, 5, 2],
         [7, 0, 8, 5, 5],
         [3, 3, 8, 2, 1],
         [2, 2, 5, 2, 9]],

        [[7, 0, 0, 0, 4],
         [6, 1, 6, 2, 6],
         [8, 4, 5, 9, 0],
         [1, 5, 8, 0, 2]]]])

In [33]:
a_4[:, :, :, :2]

array([[[[2, 6],
         [5, 3],
         [3, 9],
         [9, 4]],

        [[7, 1],
         [9, 2],
         [7, 6],
         [3, 7]],

        [[1, 9],
         [4, 7],
         [1, 1],
         [6, 7]]],


       [[[1, 0],
         [5, 6],
         [5, 2],
         [7, 7]],

        [[1, 3],
         [7, 0],
         [3, 3],
         [2, 2]],

        [[7, 0],
         [6, 1],
         [8, 4],
         [1, 5]]]])

In [34]:
a_4[:, :, :, -2:]

array([[[[4, 2],
         [8, 8],
         [0, 1],
         [4, 7]],

        [[9, 7],
         [6, 7],
         [5, 6],
         [8, 2]],

        [[9, 0],
         [2, 4],
         [6, 3],
         [9, 7]]],


       [[[8, 9],
         [2, 4],
         [0, 4],
         [7, 6]],

        [[5, 2],
         [5, 5],
         [2, 1],
         [2, 9]],

        [[0, 4],
         [2, 6],
         [9, 0],
         [0, 2]]]])

In [35]:
a_4.shape

(2, 3, 4, 5)

In [36]:
a_4[0]

array([[[2, 6, 3, 4, 2],
        [5, 3, 7, 8, 8],
        [3, 9, 0, 0, 1],
        [9, 4, 9, 4, 7]],

       [[7, 1, 6, 9, 7],
        [9, 2, 1, 6, 7],
        [7, 6, 8, 5, 6],
        [3, 7, 0, 8, 2]],

       [[1, 9, 6, 9, 0],
        [4, 7, 9, 2, 4],
        [1, 1, 9, 6, 3],
        [6, 7, 4, 9, 7]]])

In [37]:
a_4.ndim

4

## Manipulating Arrays

### Arithmetic

In [38]:
a_1 * a_2

array([[ 1. ,  5. ,  9. ],
       [ 4. , 11.4, 20.7]])

In [39]:
a_2 * a_3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [93]:
a_2.reshape((3, 2))
a_2

array([[1. , 2.5, 3. ],
       [4. , 5.7, 6.9]])

In [94]:
a_2

array([[1. , 2.5, 3. ],
       [4. , 5.7, 6.9]])

In [95]:
a_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [96]:
a_5 = a_2.reshape((3, 2))

In [97]:
a_3 * a_5

ValueError: operands could not be broadcast together with shapes (2,3,3) (3,2) 

In [98]:
a_2 / a_1

array([[1.  , 1.25, 1.  ],
       [4.  , 2.85, 2.3 ]])

In [99]:
a_2 // a_1

array([[1., 1., 1.],
       [4., 2., 2.]])

In [100]:
np.square(a_2) 

array([[ 1.  ,  6.25,  9.  ],
       [16.  , 32.49, 47.61]])

In [101]:
a_2 ** 2

array([[ 1.  ,  6.25,  9.  ],
       [16.  , 32.49, 47.61]])

In [102]:
a_2 % a_1

array([[0. , 0.5, 0. ],
       [0. , 1.7, 0.9]])

In [103]:
a_1

array([1, 2, 3])

In [104]:
a_2 % a_1

array([[0. , 0.5, 0. ],
       [0. , 1.7, 0.9]])

In [105]:
a_1 % 2

array([1, 0, 1], dtype=int32)

In [106]:
np.log(a_2)

array([[0.        , 0.91629073, 1.09861229],
       [1.38629436, 1.74046617, 1.93152141]])

In [107]:
np.exp(a_2)

array([[  2.71828183,  12.18249396,  20.08553692],
       [ 54.59815003, 298.86740097, 992.27471561]])

### aggregation (performing the same operation on a number of things)

In [108]:
listy_list = [1, 2, 3]
type(listy_list)

list

In [109]:
sum(listy_list)

6

In [110]:
sum(a_1)

6

In [111]:
a_1

array([1, 2, 3])

In [112]:
np.sum(a_1)

6

In [113]:
test_list = np.random.random(100000)

In [114]:
test_list[:100]

array([0.97861834, 0.79915856, 0.46147936, 0.78052918, 0.11827443,
       0.63992102, 0.14335329, 0.94466892, 0.52184832, 0.41466194,
       0.26455561, 0.77423369, 0.45615033, 0.56843395, 0.0187898 ,
       0.6176355 , 0.61209572, 0.616934  , 0.94374808, 0.6818203 ,
       0.3595079 , 0.43703195, 0.6976312 , 0.06022547, 0.66676672,
       0.67063787, 0.21038256, 0.1289263 , 0.31542835, 0.36371077,
       0.57019677, 0.43860151, 0.98837384, 0.10204481, 0.20887676,
       0.16130952, 0.65310833, 0.2532916 , 0.46631077, 0.24442559,
       0.15896958, 0.11037514, 0.65632959, 0.13818295, 0.19658236,
       0.36872517, 0.82099323, 0.09710128, 0.83794491, 0.09609841,
       0.97645947, 0.4686512 , 0.97676109, 0.60484552, 0.73926358,
       0.03918779, 0.28280696, 0.12019656, 0.2961402 , 0.11872772,
       0.31798318, 0.41426299, 0.0641475 , 0.69247212, 0.56660145,
       0.26538949, 0.52324805, 0.09394051, 0.5759465 , 0.9292962 ,
       0.31856895, 0.66741038, 0.13179786, 0.7163272 , 0.28940

In [115]:
%timeit sum(test_list)
%timeit np.sum(test_list)

11 ms ± 184 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
74.2 µs ± 5.52 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [116]:
np.mean(test_list)

0.499475797671074

In [117]:
### This is the measure of how spread out a cluster of members is from the mean value.
np.std(test_list)

0.2890406574912442

In [118]:
### Variance is the measure of the average degree to which each number is different from the mean value.
### Higher variance = wider range of numbers... High risk and high return
### Lower variance = narrow range of numbers... Lower risk and low return.
np.var(test_list)

0.08354450168297074

In [119]:
np.square(np.var(test_list))

0.006979683761455901

In [120]:
np.sqrt(np.std(test_list))

0.5376250156858814

In [121]:
# Standard deviation = square root of variance
np.sqrt(np.var(test_list))

0.2890406574912442

## Mean and Standard Deviation

In [90]:
high_var_array = np.array([1, 100, 200, 300, 4000, 5000])
low_var_array = np.array([2, 4, 6, 8, 10])

In [91]:
np.var(high_var_array), np.var(low_var_array)

(4296133.472222221, 8.0)

In [92]:
np.std(high_var_array), np.std(low_var_array)

(2072.711623024829, 2.8284271247461903)

In [40]:
np.mean(high_var_array), np.mean(low_var_array)

NameError: name 'high_var_array' is not defined

In [41]:
%matplotlib inline
import matplotlib.pyplot as plt

In [42]:
plt.hist(high_var_array)
plt.show()

NameError: name 'high_var_array' is not defined

In [74]:
plt.hist(low_var_array)
plt.show()

NameError: name 'low_var_array' is not defined

## Reshaping and Transposing

In [75]:
a_2 

array([[1. , 2.5, 3. ],
       [4. , 5.7, 6.9]])

In [76]:
a_3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [77]:
a_2.shape, a_3.shape

((2, 3), (2, 3, 3))

In [78]:
a_2 * a_3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [79]:
## The rules of broadcasting are as follows
## 1. You can broadcast arrays / matrices with the same dimensions or 
## 2. You can broadcase arrays / matrices when one of them has 1 in their dimensions.
a_2_reshaped = a_2.reshape((2, 3, 1))
a_2_reshaped

array([[[1. ],
        [2.5],
        [3. ]],

       [[4. ],
        [5.7],
        [6.9]]])

In [80]:
a_2_reshaped * a_3

array([[[  1. ,   2. ,   3. ],
        [ 10. ,  12.5,  15. ],
        [ 21. ,  24. ,  27. ]],

       [[ 40. ,  44. ,  48. ],
        [ 74.1,  79.8,  85.5],
        [110.4, 117.3, 124.2]]])

In [81]:
## A transpose inverts the dimensions of a matrix.
a_2.T

array([[1. , 4. ],
       [2.5, 5.7],
       [3. , 6.9]])

In [82]:
a_2.shape, a_2.T.shape

((2, 3), (3, 2))

In [83]:
a_3.shape, a_3.T.shape

((2, 3, 3), (3, 3, 2))

## Dot-product and Element-wise product

In [84]:
np.random.seed(0)
random_array_1 = np.random.randint(10, size = (5, 3))
random_array_2 = np.random.randint(10, size = (5, 3))
random_array_1, random_array_2

(array([[5, 0, 3],
        [3, 7, 9],
        [3, 5, 2],
        [4, 7, 6],
        [8, 8, 1]]),
 array([[6, 7, 7],
        [8, 1, 5],
        [9, 8, 9],
        [4, 3, 0],
        [3, 5, 0]]))

In [85]:
# Elementwise product
np.multiply(random_array_1, random_array_2)

array([[30,  0, 21],
       [24,  7, 45],
       [27, 40, 18],
       [16, 21,  0],
       [24, 40,  0]])

In [86]:
#Dot-product
np.dot(random_array_1, random_array_2)

ValueError: shapes (5,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)

In [87]:
## The cell above failed because when performing dot-product, the inner dimensions must be equal.
random_array_1.shape, random_array_2.shape

((5, 3), (5, 3))

In [88]:
## In order to perform dot-product, we need to find the transpose of random_array_2
## That will make the inner dimensions equal
## The outter dimensions will then be equal to the dimensions of the resultant array.
mat_1 = np.dot(random_array_1, random_array_2.T)
mat_1

array([[ 51,  55,  72,  20,  15],
       [130,  76, 164,  33,  44],
       [ 67,  39,  85,  27,  34],
       [115,  69, 146,  37,  47],
       [111,  77, 145,  56,  64]])

In [89]:
mat_1.shape

(5, 5)

## Creating a table and manipulating it with numpy and pandas

In [43]:
np.random.seed(0)
product_numbers = np.random.randint(20, size=(5, 3))
product_prices = np.random.randint(20, size=(1, 3))

In [44]:
product_numbers

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [45]:
product_prices

array([[14, 17,  5]])

In [46]:
weekly_sales = pd.DataFrame(product_numbers, 
                            index = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri'], 
                            columns = ['Luffy', 'Naruto', 'Zoro'])
weekly_sales

Unnamed: 0,Luffy,Naruto,Zoro
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thur,4,6,12
Fri,1,6,7


In [47]:
prices = pd.DataFrame(product_prices, index = ['Price'], columns = ['Luffy', 'Naruto', 'Zoro'])
prices

Unnamed: 0,Luffy,Naruto,Zoro
Price,14,17,5


In [48]:
product_prices.shape, product_numbers.shape

((1, 3), (5, 3))

In [49]:
total_sales = np.dot(product_prices, product_numbers.T)
total_sales

array([[423, 128, 539, 218, 151]])

In [50]:
weekly_sales['Total ($)'] = total_sales
weekly_sales

ValueError: Length of values (1) does not match length of index (5)

In [73]:
## The above code didn't run because we need to transpose the matrix to match the columns.
total_sales.shape

(1, 5)

In [57]:
weekly_sales['Total ($)'] = total_sales.T
weekly_sales

Unnamed: 0,Luffy,Naruto,Zoro,Total ($)
Mon,12,15,0,423
Tue,3,3,7,128
Wed,9,19,18,539
Thur,4,6,12,218
Fri,1,6,7,151


In [58]:
weekly_sales

Unnamed: 0,Luffy,Naruto,Zoro,Total ($)
Mon,12,15,0,423
Tue,3,3,7,128
Wed,9,19,18,539
Thur,4,6,12,218
Fri,1,6,7,151


## Comparing arrays

In [59]:
a_1 > a_2

array([[False, False, False],
       [False, False, False]])

In [60]:
bool_arr = a_1 >= a_2
bool_arr

array([[ True, False,  True],
       [False, False, False]])

In [61]:
a_1 == a_2

array([[ True, False,  True],
       [False, False, False]])

In [62]:
a_1 == a_1

array([ True,  True,  True])

In [63]:
a_1 < a_2

array([[False,  True, False],
       [ True,  True,  True]])

## Sorting arrays in Numpy

In [64]:
a_2

array([[1. , 2.5, 3. ],
       [4. , 5.7, 6.9]])

In [65]:
a_4 = np.random.randint(10, size = (2, 3))
a_4

array([[9, 8, 9],
       [4, 3, 0]])

In [66]:
np.sort(a_4)

array([[8, 9, 9],
       [0, 3, 4]])

In [67]:
np.argsort(a_4)

array([[1, 0, 2],
       [2, 1, 0]], dtype=int64)

In [68]:
np.argmax(a_4)

0

In [69]:
np.argmax(a_4, axis = 0)

array([0, 0, 0], dtype=int64)

In [70]:
np.argmax(a_4, axis = 1)

array([0, 0], dtype=int64)

In [71]:
np.argmin(a_4, axis = 0)

array([1, 1, 1], dtype=int64)

In [72]:
np.argmin(a_4, axis = 1)

array([1, 2], dtype=int64)

## Converting images to arrays

<img src="panda.png" />

In [125]:
## import an image reader
from matplotlib.image import imread

In [127]:
## This converts the image into a matrix (Pixel values being the values represented in the matrix)
panda = imread('panda.png')
panda

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       ...,

       [[0.13333334, 0.07450981, 0.05490196],
        [0.12156863, 0.0627451 , 0.04313726],
        [0.10980392, 0

In [129]:
panda.dtype

dtype('float32')

In [130]:
panda.ndim

3

In [131]:
print(type(panda))

<class 'numpy.ndarray'>


In [132]:
type(panda)

numpy.ndarray

<img src='car-photo.png' />

In [134]:
car = imread('car-photo.png')
car

array([[[0.5019608 , 0.50980395, 0.4862745 , 1.        ],
        [0.3372549 , 0.34509805, 0.30588236, 1.        ],
        [0.20392157, 0.21568628, 0.14901961, 1.        ],
        ...,
        [0.64705884, 0.7058824 , 0.54901963, 1.        ],
        [0.59607846, 0.63529414, 0.45882353, 1.        ],
        [0.44705883, 0.47058824, 0.3372549 , 1.        ]],

       [[0.44313726, 0.43529412, 0.40392157, 1.        ],
        [0.3137255 , 0.31764707, 0.27450982, 1.        ],
        [0.2       , 0.21176471, 0.14117648, 1.        ],
        ...,
        [0.5058824 , 0.5372549 , 0.4117647 , 1.        ],
        [0.49803922, 0.52156866, 0.39607844, 1.        ],
        [0.4       , 0.42745098, 0.34117648, 1.        ]],

       [[0.39607844, 0.38039216, 0.34117648, 1.        ],
        [0.31764707, 0.3137255 , 0.27450982, 1.        ],
        [0.28627452, 0.29411766, 0.24705882, 1.        ],
        ...,
        [0.44705883, 0.45882353, 0.32156864, 1.        ],
        [0.45882353, 0.482352

<img src='dog-photo.png' /> 

In [136]:
dog = imread('dog-photo.png')
dog

array([[[0.70980394, 0.80784315, 0.88235295, 1.        ],
        [0.72156864, 0.8117647 , 0.8862745 , 1.        ],
        [0.7411765 , 0.8156863 , 0.8862745 , 1.        ],
        ...,
        [0.49803922, 0.6862745 , 0.8392157 , 1.        ],
        [0.49411765, 0.68235296, 0.8392157 , 1.        ],
        [0.49411765, 0.68235296, 0.8352941 , 1.        ]],

       [[0.69411767, 0.8039216 , 0.8862745 , 1.        ],
        [0.7019608 , 0.8039216 , 0.88235295, 1.        ],
        [0.7058824 , 0.80784315, 0.88235295, 1.        ],
        ...,
        [0.5019608 , 0.6862745 , 0.84705883, 1.        ],
        [0.49411765, 0.68235296, 0.84313726, 1.        ],
        [0.49411765, 0.68235296, 0.8392157 , 1.        ]],

       [[0.6901961 , 0.8       , 0.88235295, 1.        ],
        [0.69803923, 0.8039216 , 0.88235295, 1.        ],
        [0.7058824 , 0.80784315, 0.88235295, 1.        ],
        ...,
        [0.5019608 , 0.6862745 , 0.84705883, 1.        ],
        [0.49803922, 0.686274