## Numpy

In [2]:
import numpy as np

In [3]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [4]:
[1, True, 2.5, 4]

[1, True, 2.5, 4]

In [5]:
np.array([1, True, 2.5, 4])

array([1. , 1. , 2.5, 4. ])

In [6]:
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [7]:
a1 = np.array([1, 2, 3, 4])

In [8]:
type(a1)

numpy.ndarray

In [9]:
a2 = np.array([[1, 2, 3], [4, 5, 6]])

In [10]:
type(a2)

numpy.ndarray

In [11]:
a2.shape

(2, 3)

In [12]:
a2.ndim

2

In [13]:
a2.dtype

dtype('int64')

In [14]:
a2.size

6

## Creating Numpy Arrays from Scratch
### zeros, ones, full, arange, linspace 

In [15]:
np.zeros([2, 4], dtype = int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [16]:
np.ones([3, 5], dtype = float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [17]:
# arange
# create an array filled with a linear sequence
# starting at 0, ending at 20, stepping by 2
# this is similar to the built-in range() function
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [18]:
np.full([4, 6], True)

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [19]:
# create an array of five values evenly spaced between 0 and 1 
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### random

In [20]:
np.random.random((4, 4))

array([[0.80629512, 0.37289637, 0.28697448, 0.4827067 ],
       [0.66522147, 0.42215591, 0.85603222, 0.32244323],
       [0.0262339 , 0.0958103 , 0.64615339, 0.79240182],
       [0.41528224, 0.54738042, 0.83824231, 0.6976804 ]])

In [21]:
# seed for reproducibility
np.random.seed(0)
np.random.random((4, 4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [22]:
np.random.normal(0, 1, (3, 3))

array([[ 0.44386323,  0.33367433,  1.49407907],
       [-0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 ]])

In [23]:
np.random.randint(0, 10, (4, 5))

array([[7, 2, 0, 0, 4],
       [5, 5, 6, 8, 4],
       [1, 4, 9, 8, 1],
       [1, 7, 9, 9, 3]])

In [24]:
np.random.rand(4, 5)

array([[0.65314004, 0.17090959, 0.35815217, 0.75068614, 0.60783067],
       [0.32504723, 0.03842543, 0.63427406, 0.95894927, 0.65279032],
       [0.63505887, 0.99529957, 0.58185033, 0.41436859, 0.4746975 ],
       [0.6235101 , 0.33800761, 0.67475232, 0.31720174, 0.77834548]])

## Array indexing and slicing

In [25]:
x1 = np.random.randint(20, size = 6)

In [26]:
x1

array([ 3,  0,  5,  0, 17, 18])

In [27]:
x1[4], x1[0], x1[3]

(17, 3, 0)

In [28]:
x1[-1]

18

## Multi-dimension array

In [29]:
x2 = np.random.randint(10, size = (3, 4))

In [30]:
x2

array([[4, 2, 0, 3],
       [2, 0, 7, 5],
       [9, 0, 2, 7]])

In [31]:
x2[1, 2]

7

## slicing
#### x[start:stop:step]

In [32]:
x1

array([ 3,  0,  5,  0, 17, 18])

In [33]:
x1[0:3]

array([3, 0, 5])

In [34]:
x1[::2]

array([ 3,  5, 17])

In [35]:
x2

array([[4, 2, 0, 3],
       [2, 0, 7, 5],
       [9, 0, 2, 7]])

In [36]:
x2[:2, :3]

array([[4, 2, 0],
       [2, 0, 7]])

In [37]:
x2[:, :2]

array([[4, 2],
       [2, 0],
       [9, 0]])

## reshaping of arrays and transpose

In [38]:
grid = np.arange(1, 10)
grid.shape

(9,)

In [39]:
grid.reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [40]:
x = np.array([1, 2, 3])

In [41]:
x.shape

(3,)

In [42]:
x.reshape(1, 3).shape

(1, 3)

In [43]:
x = np.array([[1, 2],[3, 4]])

In [44]:
x

array([[1, 2],
       [3, 4]])

In [45]:
x.T

array([[1, 3],
       [2, 4]])

## Array Concatenation and Splitting

In [46]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [47]:
np.concatenate((x, y)) # axis = 0 by default

array([1, 2, 3, 3, 2, 1])

In [48]:
grid = np.array([[1, 2, 3], [4, 5, 6]])
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [49]:
np.concatenate((grid, grid), axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [50]:
# vstack
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],[6, 5, 4]])
np.vstack((x, grid))

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [51]:
y = np.array([[99], [99]])
np.hstack((y, grid))

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

## splitting of arrays

In [52]:
x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 33, 12, 99, 34])

In [53]:
x1, x2, x3 = np.split(x, [3, 5])

In [54]:
x1

array([1, 2, 3])

In [55]:
x2

array([4, 5])

In [56]:
x3

array([ 6,  7,  8,  9, 33, 12, 99, 34])

## Broadcasting and Vectorized operation

In [57]:
a = np.arange(3)

In [58]:
a

array([0, 1, 2])

In [59]:
a + 5 # broadcasting

array([5, 6, 7])

In [60]:
b = np.ones((3, 3))

In [61]:
b

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [62]:
a.shape
b.shape

(3, 3)

In [63]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [64]:
a * b

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [65]:
c = np.arange(3).reshape((3, 1))

In [66]:
c

array([[0],
       [1],
       [2]])

In [67]:
a + c

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

## Manipulation & Comparing Arrays
### Aggregation & comparing Arrays
#### Aggregation = performing the same operation on a number of things

In [68]:
list_number = [1, 2, 3]

In [69]:
ll = np.array(list_number)

In [70]:
ll

array([1, 2, 3])

In [71]:
sum(ll) # python sum

6

In [72]:
np.sum(ll) # numpy sum

6

In [73]:
# Create a massive Numpy array
massive_array = np.random.random(10000)
massive_array[:5]
massive_array.shape

(10000,)

In [74]:
%timeit sum(massive_array) # Python built-in function sum()
%timeit np.sum(massive_array) # Numpy's function np.sum()

854 µs ± 3.64 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
10.1 µs ± 382 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [75]:
np.mean(massive_array)

0.5013357616718551

In [76]:
np.max(massive_array)

0.9999709462497284

In [77]:
np.min(massive_array)

2.0027530115096503e-05

In [78]:
dogs_height = [600,470,170,430,300]
dogs_height = np.array(dogs_height)
np.std(dogs_height)

147.32277488562318

In [79]:
np.var(dogs_height)

21704.0

In [80]:
np.sqrt(np.var(dogs_height))

147.32277488562318

### Sorting Array
np.sort uses an quicksort algorithm

In [84]:
x = np.array([2,1,4,3,5,7,9,1,2])
np.sort(x)

array([1, 1, 2, 2, 3, 4, 5, 7, 9])

In [85]:
# returns index of sorted elements
np.argsort(x)

array([1, 7, 0, 8, 3, 2, 4, 5, 6])

## sorting along rows and columns
Numpy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the axis argument

In [88]:
np.random.seed(42)
MatA = np.random.randint(0,10,size=(4,6))

In [89]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [90]:
np.sort(MatA, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [91]:
np.sort(MatA, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

## Linear Algebra

In [92]:
A = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])

In [93]:
B = np.array([[6,5],
              [4,3],
              [2,1]])

In [94]:
# A (3x3) dot product B (3x2)
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [95]:
A@B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [96]:
B.T

array([[6, 4, 2],
       [5, 3, 1]])

In [98]:
B.T.dot(A)

array([[36, 48, 60],
       [24, 33, 42]])

### Dot Product Example

In [99]:
# number of jars sold
np.random.seed(0)
sales_amount = np.random.randint(20, size=(5,3))

In [100]:
sales_amount

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [101]:
# create weekly_sales Data Frame
import pandas as pd
weekly_sales = pd.DataFrame(sales_amount, index=['Mon', 'Tues', 'Wed', 'Thu', 'Fri'], columns=['Almond Butter', 'Peanut Butter', 'Cashew Butter'])

In [102]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thu,4,6,12
Fri,1,6,7


In [112]:
# create a price array
prices = np.array([10,8,12]) # np.array([[10,8,2]]), no need to reshape
prices.shape

(3,)

In [114]:
butter_prices = pd.DataFrame(prices.reshape(1,3), index=['Price'], columns=['Almond Butter', 'Peanut Butter', 'Cashew Butter'])

In [105]:
butter_prices 

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12
