# Import Numpy

In [1]:
import numpy as np

# Create Numpy Arrays from Python Lists

In [2]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [3]:
[3.14, 4, 3]

[3.14, 4, 3]

In [4]:
np.array([3.14, 4, 3])

array([3.14, 4.  , 3.  ])

In [5]:
np.array([1, 2, 3, 4], dtype = 'float32')

array([1., 2., 3., 4.], dtype=float32)

In [6]:
a1 = np.array([1, 2, 3, 4])

In [7]:
type(a1)

numpy.ndarray

In [8]:
a2 = np.array([[1, 2, 3], 
              [4, 5, 6]])

In [9]:
type(a2)

numpy.ndarray

In [10]:
a2.shape

(2, 3)

In [11]:
a2.ndim

2

In [12]:
a2.dtype

dtype('int32')

In [13]:
a2.size

6

# Create Numpy Arrays from Scratch

### `zeros`, `ones`, `full`, `arrange`, `linspace`

In [14]:
np.zeros([2, 4])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [15]:
np.zeros([2, 4]).dtype

dtype('float64')

In [16]:
np.zeros([2, 4], dtype = int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [17]:
np.ones((3, 5), dtype = float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [18]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2 
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [19]:
np.full((3, 5), 6.9)

array([[6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9]])

In [20]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### `random`

In [21]:
np.random.random((4, 4))

array([[0.00645162, 0.95173629, 0.9872604 , 0.01263854],
       [0.04783158, 0.2134431 , 0.03070948, 0.84922829],
       [0.71428104, 0.56601119, 0.54079651, 0.96731983],
       [0.17440366, 0.0029429 , 0.68206886, 0.28028879]])

In [22]:
np.random.random((4, 4))

array([[0.81644417, 0.34017932, 0.41471007, 0.35079028],
       [0.26638915, 0.69523367, 0.87179104, 0.07232023],
       [0.83376942, 0.0510018 , 0.45480889, 0.30682616],
       [0.33549399, 0.86039889, 0.30773216, 0.59886265]])

In [31]:
# Seed for reproducibility
np.random.seed(0)
np.random.random((4, 4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [32]:
np.random.normal(0, 1, (3, 3))

array([[ 0.44386323,  0.33367433,  1.49407907],
       [-0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 ]])

In [33]:
np.random.randint(0, 5, (4,5))

array([[4, 2, 0, 0, 4],
       [0, 4, 1, 4, 1],
       [2, 2, 0, 1, 1],
       [1, 1, 3, 3, 2]])

In [34]:
np.random.rand(4, 4)

array([[0.1289263 , 0.31542835, 0.36371077, 0.57019677],
       [0.43860151, 0.98837384, 0.10204481, 0.20887676],
       [0.16130952, 0.65310833, 0.2532916 , 0.46631077],
       [0.24442559, 0.15896958, 0.11037514, 0.65632959]])

### Array Indexing & Slicing

#### One-dimensional subarray

In [35]:
x1 = np.random.randint(20, size = 6)

In [36]:
x1

array([17,  5,  9,  3,  0,  5])

In [37]:
x1[4]

0

In [38]:
x1[4], x1[0]

(0, 17)

In [39]:
x1[4], x1[0], x1[-1]

(0, 17, 5)

#### Multi-dimensional array

In [40]:
x2 = np.random.randint(10, size = (3, 4))

In [41]:
x2

array([[0, 1, 2, 4],
       [2, 0, 3, 2],
       [0, 7, 5, 9]])

In [42]:
x2[1,2]

3

In [43]:
x2[1,2] = 6

In [44]:
x2

array([[0, 1, 2, 4],
       [2, 0, 6, 2],
       [0, 7, 5, 9]])

#### Slicing:
`x[start:stop:step]`

In [45]:
x1

array([17,  5,  9,  3,  0,  5])

In [46]:
x1[0:3]

array([17,  5,  9])

In [47]:
x1[2:4]

array([9, 3])

In [48]:
# every other element, every 2 step
x1[::2]

array([17,  9,  0])

In [49]:
x2

array([[0, 1, 2, 4],
       [2, 0, 6, 2],
       [0, 7, 5, 9]])

In [51]:
# two rows, three columns
x2[:2,:3]

array([[0, 1, 2],
       [2, 0, 6]])

In [52]:
x2[:, :2]

array([[0, 1],
       [2, 0],
       [0, 7]])

## Reshaping of Arrays & Transpose

In [55]:
grid = np.arange(1, 10)
grid.shape

(9,)

In [56]:
grid.reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [57]:
x = np.array([1, 2, 3])

In [58]:
x.shape

(3,)

In [59]:
x.reshape((1,3)).shape

(1, 3)

In [60]:
x = np.array([[1. , 2.], [3. ,4.]])

In [61]:
x

array([[1., 2.],
       [3., 4.]])

In [62]:
x.T

array([[1., 3.],
       [2., 4.]])

## Array Concatenation and Splitting

### Concatenation

In [65]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [66]:
np.concatenate((x,y))

array([1, 2, 3, 3, 2, 1])

In [70]:
grid = np.array([[1, 2, 3],
                [4, 5, 6]])
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [73]:
np.concatenate((grid, grid)) # axis = 0 by default

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [74]:
np.concatenate((grid, grid), axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [78]:
# vertical stack the arrays: vstack
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])

In [76]:
np.vstack((x, grid))

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [77]:
# horizontally stack the arrays: hstack
y = np.array([[99],
            [99]])
np.hstack((y, grid))

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

### Splitting of arrays

In [79]:
x = np.array([1, 2, 3, 99, 69, 3, 2, 1])

In [80]:
np.split(x, [3, 5])

[array([1, 2, 3]), array([99, 69]), array([3, 2, 1])]

In [83]:
x1, x2, x3 = np.split(x, [3, 5])
x1

array([1, 2, 3])

In [84]:
x2

array([99, 69])

In [85]:
x3

array([3, 2, 1])

## Broadcasting and Vectorized operations

### Broadcasting

In [86]:
a = np.arange(3)

In [87]:
a

array([0, 1, 2])

In [88]:
a + 5

array([5, 6, 7])

In [90]:
b = np.ones((3,3))

In [91]:
b

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [92]:
a.shape, b.shape

((3,), (3, 3))

In [93]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [94]:
a*b

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [95]:
c = np.arange(3).reshape((3,1))

In [96]:
c

array([[0],
       [1],
       [2]])

In [97]:
c + a

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

## Manipulating & Comparing Arrays

### Aggregation
Aggregation = perform the same operation on a number of things

In [98]:
list_number = [1, 2, 3]

In [99]:
ll = np.array(list_number)

In [100]:
ll

array([1, 2, 3])

In [102]:
sum(ll) # python sum()

6

In [103]:
np.sum(ll) # numpy sum()

6

In [106]:
# Create a massive Numpy array
massive_array = np.random.random(10000)
massive_array[:5]
massive_array.shape

(10000,)

In [110]:
%timeit sum(massive_array) # python built-in function sum()
%timeit np.sum(massive_array) # numpy's np.sum()

917 µs ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10.2 µs ± 2.05 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [111]:
np.mean(massive_array)

0.5052960250326816

In [112]:
np.max(massive_array)

0.999685002639177

In [113]:
np.min(massive_array)

0.0002078643980524264

## Standard Deviation and Variance

### Standard Deviation
(độ lệch chuẩn)

In [None]:
# Standard deviation = a measure of how spread out a group of numbers is from the mean
# Standard deviation = square root of Variance

### Variance
(phương sai)

In [114]:
# Variance = measure of the average degree to which each number is different to the mean
# Higher variance = wider range of numbers
# Lower variance = lower range of numbers

In [115]:
dog_height = [600, 470, 170, 430, 300]
dog_height = np.array(dog_height)

np.std(dog_height)

147.32277488562318

In [116]:
np.var(dog_height)

21704.0

In [117]:
np.sqrt(np.var(dog_height))

147.32277488562318

## Sorting Arrays
np.sort uses an quicksort algorithm

In [118]:
x = np.array([2, 1, 4, 3, 5])
np.sort(x)

array([1, 2, 3, 4, 5])

In [119]:
# A related function is argsort, which instead returns the indices of the sorted elements:
np.argsort(x)

array([1, 0, 3, 2, 4], dtype=int64)

### Sorting along rows or columns
NumPy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the axis argument

In [120]:
np.random.seed(42)
MatA = np.random.randint(0, 10, size = (4,6))

In [121]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [122]:
np.sort(MatA, axis = 0) # axis = 0 is column sort

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [123]:
np.sort(MatA, axis = 1) # axis = 1 is row sort

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

## Linear Algebra
(Đại số tuyến tính)

In [124]:
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

In [125]:
B = np.array([[6, 5],
              [4, 3],
              [2, 1]])

In [129]:
# A(3x3) dot product B(3x2) (tích vô hướng)

In [127]:
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [128]:
1*6 + 2*4 + 3*2

20

In [130]:
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [131]:
# B(3x2) dot A(3x3)
B.T

array([[6, 4, 2],
       [5, 3, 1]])

In [132]:
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

### Dot Product Example

In [133]:
# Number of jars sold
np.random.seed(0)
sales_amounts = np.random.randint(20, size = (5, 3))

In [135]:
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [136]:
# Create weekly_sales DataFrame
import pandas as pd
weekly_sales = pd.DataFrame(sales_amounts, index = ["Mon", "Tues", "Wed", "Thurs", "Fri"],
                                           columns = ["Almond Butter", "Peanut Butter", "Cashew Butter"])

In [137]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [139]:
# create a price array
prices = np.array([10, 8, 12])

In [142]:
prices.shape

(3,)

In [140]:
butter_prices = pd.DataFrame(prices.reshape(1,3), index = ["Price"], columns = ["Almond Butter", "Peanut Butter", "Cashew Butter"])

In [141]:
butter_prices

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [144]:
weekly_sales.shape, butter_prices.T.shape

((5, 3), (3, 1))

In [146]:
total_prices = weekly_sales.dot(butter_prices.T)

In [147]:
total_prices

Unnamed: 0,Price
Mon,240
Tues,138
Wed,458
Thurs,232
Fri,142


In [148]:
weekly_sales["Total Price"] = total_prices

In [149]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Total Price
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142
