In [None]:
############################# NumPy Library ######################################################
### Powerful library for dealing with Numerical data. 
### Other machine learning packages (e.g. Pandas and scikit-learn) was build on top of NumPy
### One of NumPy facilities is the n-dimentional array object.
### As we know, arrays store data of the same type, makes math and data calculations more easily
###################################################################################################

In [2]:
import numpy as np # rename library as 'np' just for quick typing

In [3]:
# convert a list object to an array object
lst = [0,1,2,3,4]
arr = np.array(lst)
arr

array([0, 1, 2, 3, 4])

In [4]:
print(type(arr))

<class 'numpy.ndarray'>


In [7]:
##### Array vs. List ##########
# Arrays are fixed sized, while lists are dynamically structured
# Arrays are preferred over lists in data manipulation as it allows vectorization operations
# means that you can apply your functions on each item of an array, not the whole object

In [8]:
# let's add two to a list object vs. to array object
lst + 2 # error

TypeError: can only concatenate list (not "int") to list

In [10]:
arr + 2 # it works :)

array([2, 3, 4, 5, 6])

In [11]:
# create a mtrix from a list-of-list
# note: np.array requires less memory allocation that a list-of-lists
lst2 = [[0,1,2], [3,4,5], [6,7,8]]
arr2 = np.array(lst2)
arr2

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [6]:
# creating a multi-type array
#m_array = np.array(['a','b','c',1]) # notice that it considers the number '1' as string
m_array = np.array(['a','b','c',1], dtype='object') #other types could be int, str, float, bool
m_array

array(['a', 'b', 'c', 1], dtype=object)

In [15]:
# Convert a list into an array
arr.tolist()

[0, 1, 2, 3, 4]

In [7]:
#### Array characterstics
lst3 = [[1,2,3,4], [5,6,7,8], [9,10,11,12]]
arr3 = np.array(lst3, dtype= 'float')
arr3

array([[ 1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.],
       [ 9., 10., 11., 12.]])

In [8]:
print(arr3.shape); # (rows, cols)
print(arr3.dtype); # data type
print(arr3.size); # number of array items
print(arr3.ndim); # number of dimensions

(3, 4)
float64
12
2


In [9]:
# Array vs. list slicing
arr3[:2,:3] # extract first 2 rows, and first 3 cols
#lst3[:2,:3] # error

array([[1., 2., 3.],
       [5., 6., 7.]])

In [10]:
# create a boolean array that satisfies a certain condiotion
bool_arr3 = arr3 > 4
bool_arr3

array([[False, False, False, False],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [11]:
# Then you can apply the boolean array for conditioning
arr3[bool_arr3]

array([ 5.,  6.,  7.,  8.,  9., 10., 11., 12.])

In [18]:
# array transpose is not as the reverse
tr_arr3 = arr3.transpose()
tr_arr3

array([[ 1.,  5.,  9.],
       [ 2.,  6., 10.],
       [ 3.,  7., 11.],
       [ 4.,  8., 12.]])

In [19]:
# lists are reversed as a whole, but with np arrays you may reverse rows only, cols only, or the whole array
#print(arr3[::-1,]); # rows only
#print(arr3[:,::-1]); # cols only
print(arr3[::-1, ::-1]) # the whole array

[[12. 11. 10.  9.]
 [ 8.  7.  6.  5.]
 [ 4.  3.  2.  1.]]


In [21]:
# Quick Statistical operations
print('mean', arr3.mean());
print('max', arr3.max());
print('min', arr3.min())

mean 6.5
max 12.0
min 1.0


In [22]:
# min/max on row/col level
print('Min in each row: ', np.amin(arr3, axis=1)) ;
print('Min in each col: ', np.amin(arr3, axis=0))

Min in each row:  [1. 5. 9.]
Min in each col:  [1. 2. 3. 4.]


In [20]:
# cumulative sum
np.cumsum(arr)

array([ 0,  1,  3,  6, 10], dtype=int32)

In [23]:
# Copying an array
arr3_slice = arr3[:2,:2]
arr3_slice

array([[1., 2.],
       [5., 6.]])

In [24]:
arr3_slice[:1,:1] = 100
arr3_slice # the slice refer to the original array in memory, so it will affect arr3 too
arr3

array([[100.,   2.,   3.,   4.],
       [  5.,   6.,   7.,   8.],
       [  9.,  10.,  11.,  12.]])

In [25]:
# solution: make a copy, it does not affect the original array but costs memory ;)
arr3_copy = arr3[:2,:2].copy()
arr3_copy

array([[100.,   2.],
       [  5.,   6.]])

In [26]:
arr3_copy[:1,:1] = 50 # now look at the original one
arr3

array([[100.,   2.,   3.,   4.],
       [  5.,   6.,   7.,   8.],
       [  9.,  10.,  11.,  12.]])

In [27]:
#### Re-shaping: Changing dimensions
arr3.shape

(3, 4)

In [28]:
arr3.reshape(4,3)

array([[100.,   2.,   3.],
       [  4.,   5.,   6.],
       [  7.,   8.,   9.],
       [ 10.,  11.,  12.]])

In [90]:
### flatten an array is to convert it to a 1D
arr3.flatten() # does not affect the original array as it makes a copy, but it costs memory
arr3.ravel() # memory saver, but does affect the original one

array([100.,   2.,   3.,   4.,   5., 100.,   7.,   8.,   9.,  10.,  11.,
        12.])

In [29]:
###### create array from a range
print(np.arange(5)); 
print(np.arange(3,10)); # start, end+1
print(np.arange(10,2, -2)); # start, end+1, step

[0 1 2 3 4]
[3 4 5 6 7 8 9]
[10  8  6  4]


In [30]:
# create an array of 10 numbers that starts at 1, and ends at 50. What would be the step value?
np.linspace(start=1, stop=50, num=10, dtype=int)

array([ 1,  6, 11, 17, 22, 28, 33, 39, 44, 50])

In [31]:
# Generate an array of ones and zeros
np.zeros([3,3])
np.ones([2,2])

array([[1., 1.],
       [1., 1.]])

In [32]:
#### Repetation
print('Repeat each number twice:', np.repeat(arr, 2));
print('Repeat each the whole array twice:', np.tile(arr, 2))

Repeat each number twice: [0 0 1 1 2 2 3 3 4 4]
Repeat each the whole array twice: [0 1 2 3 4 0 1 2 3 4]


In [38]:
#### Random arrays
print(np.random.rand(2,2)); # andom numbers between 0 and 1
print(np.random.randn(2,2)); # normal distribution
print(np.random.randint(0,10, size=[2,2]));
print(np.random.random()); # 

[[0.49850701 0.22479665]
 [0.19806286 0.76053071]]
[[ 0.26551159  0.10854853]
 [ 0.00429143 -0.17460021]]
[[1 8]
 [4 1]]
0.9177741225129434


In [39]:
# Set the seed
np.random.seed(10)
print(np.random.rand(2,2))
rn = np.random.RandomState(10) # after then you don't need to specify 'random'
# create 
print(rn.rand(2,2))

[[0.77132064 0.02075195]
 [0.63364823 0.74880388]]
[[0.77132064 0.02075195]
 [0.63364823 0.74880388]]
