## Python for Data Science - Numpy
<b><i>By Patrick Jose Roxas</i></b>

In [1]:
import numpy as np
import sys

<b><i> 1-dimensional array

In [2]:
a = np.array([1,2,3])

print(a)

[1 2 3]


<b><i> 2-dimensional

In [3]:
b = np.array([[9.0, 8.0, 7.0],
            [6.0, 5.0, 4.0]])

print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


<b><i> A. Basic NumPy Methods

In [4]:
## Get dimensions
print('array a: {}'.format(a.ndim))
print('array b: {}'.format(b.ndim))

array a: 1
array b: 2


In [5]:
## Get shape
print('array a: {}'.format(a.shape))
print('array b: {}'.format(b.shape))

array a: (3,)
array b: (2, 3)


In [6]:
# Get type
print('array a: {}'.format(a.dtype))
print('array b: {}'.format(b.dtype))

array a: int32
array b: float64


In [7]:
# Get size
print('array a: {}'.format(a.itemsize))
print('array b: {}'.format(b.itemsize))

array a: 4
array b: 8


In [8]:
# Get total size
print('array a: {} '.format(a.nbytes))
print('array b: {} '.format(b.nbytes))

array a: 12 
array b: 48 


In [9]:
# Flags

print('array a: {}'.format(a.flags))
print('array b: {}'.format(b.flags))

array a:   C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

array b:   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False



In [10]:
## Get Strides

print('array a: {}'.format(a.strides))
print('array b: {}'.format(b.strides))

array a: (4,)
array b: (24, 8)


<b><i> B. Accessing or Changing specific elements, rows, columns etc

In [11]:
a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [12]:
# Get specific element [r,c]

a[1,5]

13

In [13]:
# Get a specific row

a[0,:]

array([1, 2, 3, 4, 5, 6, 7])

In [14]:
# Get specific column
a[:,2]

array([ 3, 10])

<b><i>C. Numpy Slicing</i></b>

    [start_index : end_index: step_size]

In [15]:
## 1-dim
array_a = np.array([1,2,3,4,5,6,7,8,9,10])
print(array_a)

[ 1  2  3  4  5  6  7  8  9 10]


In [16]:
array_a[1:11:2]

array([ 2,  4,  6,  8, 10])

In [17]:
# 2-dim
array_b = np. array([[1,2,3,4,5,6,7,8,9,10],
             [11,12,13,14,15,16,17,18,19,20]])
print(array_b)

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]]


In [18]:
print(array_b[0,1:10:2])
print(array_b[1,0:9:2])

[ 2  4  6  8 10]
[11 13 15 17 19]


In [19]:
# Appending

array_b[1,5] = 20
print(array_b)

array_b[:,2] = [1,2]
print(array_b)

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 20 17 18 19 20]]
[[ 1  2  1  4  5  6  7  8  9 10]
 [11 12  2 14 15 20 17 18 19 20]]


<b><i> 3-d example
    
    dimension [L * h * w ]

In [20]:
array_c  = np.array([[[1,2],
               [3,4]],
              [[5,6],
               [7,8]]])

print(array_c)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [21]:
# Get specific element

print(array_c[1,:,0])
print(array_c[:, 1,1])
print(array_c[1,:,0:1])

[5 7]
[4 8]
[[5]
 [7]]


In [22]:
# replace
array_c[:,1,:] = [[1,3],[3,4]]
print(array_c)

[[[1 2]
  [1 3]]

 [[5 6]
  [3 4]]]


<b><i>D. Initializing different type of arrays

In [23]:
# null matrix 1-dim

np.zeros(5)

array([0., 0., 0., 0., 0.])

In [24]:
# 2-dim
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [25]:
# unit matrix 3-dim
np.ones([4,2,2], dtype='int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]])

In [26]:
# Any other number

np.full((2,2), 99)

array([[99, 99],
       [99, 99]])

In [27]:
# Any other number full like

np.full_like(array_c, 4)

array([[[4, 4],
        [4, 4]],

       [[4, 4],
        [4, 4]]])

<b><i>Generating a uniform distribution between (0,1]

In [28]:
# random decimal numbers using dimension
np.random.rand(4,2)

array([[0.21462968, 0.73068474],
       [0.66467739, 0.42906323],
       [0.89517604, 0.25883771],
       [0.61426754, 0.89830821]])

In [29]:
## random decimal number using shape
## mask = np.random.random_shape(a.shape)
array_e = np.random.random(a.shape)
print(array_e)

[[0.59383751 0.23923218 0.55553666 0.34020986 0.57541665 0.03180949
  0.3778122 ]
 [0.87730103 0.53249228 0.75735759 0.18648618 0.59021279 0.39738396
  0.37909319]]


In [30]:
array_d = array_e.reshape(10,2)
print(array_d)

ValueError: cannot reshape array of size 14 into shape (10,2)

In [31]:
## random integer values
np.random.randint(7, size=(3,3))

array([[2, 4, 1],
       [5, 2, 6],
       [0, 4, 0]])

In [32]:
## identity matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [33]:
# Repeat array

arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis=0)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


<b><i>Exercise</i></b>
    Using all the method taught above, generate a matrix that will
    look like this:
    
    [[1, 1, 1, 1, 1]
     [1, 0, 0, 0, 1]
     [1, 0, 9, 0, 1]
     [1, 0, 0, 0, 1]
     [1, 1, 1, 1, 1]]

In [34]:
output = np.ones((5,5))
print(output)
z= np.zeros((3,3))
z[1,1] = 9
print(z)
output[1:4, 1:4] = z
print(output)
# output[]

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


<b><i>Copying and array

In [35]:
array_x = array_a
array_x

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [36]:
array_x[3:5] = [200, 100]
print(array_a)

[  1   2   3 200 100   6   7   8   9  10]


In [37]:
# copying
array_a = np.array([1,2,3,4,5,6,7,8,9,10])
array_x = array_a.copy()
array_x[3:5] = [200, 100]

print(array_a)
print(array_x)

[ 1  2  3  4  5  6  7  8  9 10]
[  1   2   3 200 100   6   7   8   9  10]


## Mathematics
<b><i> A. Broadcasting

In [38]:
new_array = np.random.rand(7,4)
print(new_array)

[[0.64286003 0.74082728 0.35416953 0.18908776]
 [0.90905079 0.28276727 0.46861832 0.11919131]
 [0.76874204 0.38346365 0.10377168 0.33868222]
 [0.10938126 0.96738167 0.55212248 0.11234226]
 [0.23445364 0.15288069 0.10293087 0.86996762]
 [0.91526915 0.46753763 0.01400558 0.51852389]
 [0.56619596 0.24042485 0.93717554 0.92840746]]


In [39]:
new_array + 7

array([[7.64286003, 7.74082728, 7.35416953, 7.18908776],
       [7.90905079, 7.28276727, 7.46861832, 7.11919131],
       [7.76874204, 7.38346365, 7.10377168, 7.33868222],
       [7.10938126, 7.96738167, 7.55212248, 7.11234226],
       [7.23445364, 7.15288069, 7.10293087, 7.86996762],
       [7.91526915, 7.46753763, 7.01400558, 7.51852389],
       [7.56619596, 7.24042485, 7.93717554, 7.92840746]])

In [40]:
new_array + np.array([1,2,3,4])

array([[1.64286003, 2.74082728, 3.35416953, 4.18908776],
       [1.90905079, 2.28276727, 3.46861832, 4.11919131],
       [1.76874204, 2.38346365, 3.10377168, 4.33868222],
       [1.10938126, 2.96738167, 3.55212248, 4.11234226],
       [1.23445364, 2.15288069, 3.10293087, 4.86996762],
       [1.91526915, 2.46753763, 3.01400558, 4.51852389],
       [1.56619596, 2.24042485, 3.93717554, 4.92840746]])

In [41]:
new_array + np.array([1,2,3])

ValueError: operands could not be broadcast together with shapes (7,4) (3,) 

In [42]:
new_array + np.array([1,2,3,4,5,6,7])

ValueError: operands could not be broadcast together with shapes (7,4) (7,) 

In [43]:
new_array+np.array([[1],[2],[3],[4],[5],[6],[7]])

array([[1.64286003, 1.74082728, 1.35416953, 1.18908776],
       [2.90905079, 2.28276727, 2.46861832, 2.11919131],
       [3.76874204, 3.38346365, 3.10377168, 3.33868222],
       [4.10938126, 4.96738167, 4.55212248, 4.11234226],
       [5.23445364, 5.15288069, 5.10293087, 5.86996762],
       [6.91526915, 6.46753763, 6.01400558, 6.51852389],
       [7.56619596, 7.24042485, 7.93717554, 7.92840746]])

<b><i> Element-wise Multiplication

In [44]:
matrix_a = np.array([[1,2],
                    [3,4]])
matrix_b = np.array([[4,3],
                    [0,2]])
matrix_c = np.array([[1,2,3],
                    [4,5,6]])

In [45]:
matrix_a * matrix_b

array([[4, 6],
       [0, 8]])

In [46]:
matrix_a * matrix_c

ValueError: operands could not be broadcast together with shapes (2,2) (2,3) 

<b><i>Linear Algebra

In [47]:
vector_a = np.random.randint(8, size=(3,3))
vector_b = np.random.randint(8, size=(3,3))
print(vector_a)
print(vector_b)

[[1 7 1]
 [3 0 4]
 [4 7 1]]
[[4 7 5]
 [5 2 4]
 [1 1 5]]


In [48]:
# Determinant of a matrix
print("\nDeterminant of A:", np.linalg.det(vector_a))


Determinant of A: 83.99999999999999


In [49]:
# Rank of a matrix
print("Rank of A:", np.linalg.matrix_rank(vector_a))

Rank of A: 3


In [50]:
## Inverse
print("\nInverse of A:\n", np.linalg.inv(vector_a))


Inverse of A:
 [[-0.33333333  0.          0.33333333]
 [ 0.1547619  -0.03571429 -0.01190476]
 [ 0.25        0.25       -0.25      ]]


In [51]:
print("\nMatrix A raised to power 3:\n",
           np.linalg.matrix_power(vector_a, 3))


Matrix A raised to power 3:
 [[188 392 112]
 [194 182 222]
 [266 434 202]]


In [52]:
# dot product
np.dot(vector_a, vector_b)

array([[40, 22, 38],
       [16, 25, 35],
       [52, 43, 53]])

<b><i>Statistics

In [53]:
stats = np.random.randint(10, size=(2,5))
print('----- Statistics -----------')
print('|min:    ', np.min(stats,axis=1))
print('|q1:     ', np.quantile(stats,0.25,axis=1))
print('|q2:     ', np.quantile(stats,0.5,axis=1))
print('|q3:     ', np.quantile(stats,0.75,axis=1))
print('|max:    ', np.max(stats, axis=1))
print('|mean:   ', np.mean(stats, axis=1))
print('|std:    ', np.std(stats, axis=1))
print('|sum:    ', np.sum(stats, axis=1))
print('----------------------------')

----- Statistics -----------
|min:     [1 1]
|q1:      [3. 1.]
|q2:      [4. 1.]
|q3:      [9. 3.]
|max:     [9 7]
|mean:    [5.2 2.6]
|std:     [3.24961536 2.33238076]
|sum:     [26 13]
----------------------------


<b><i> Reorganizing Arrays

In [54]:
before = np.array([[1,2,3,4], [5,6,7,8]])
print('before',before)

after = before.reshape(4,2)
print('after',after)

before [[1 2 3 4]
 [5 6 7 8]]
after [[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [55]:
# Vertical stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v1,v2])


array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [56]:
# Horizontal Stack

h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

### Miscellaneous
<b><i> Load Data from File

In [57]:
filedata = np.genfromtxt('new.txt', delimiter=',')
filedata = filedata.astype('int32')
filedata

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16]])

<b><i>Boolean Masking and Advanced Indexing

In [58]:
filedata > 50

array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False]])

In [59]:
filedata[filedata>10]

array([11, 12, 13, 14, 15, 16, 11, 12, 13, 14, 15, 16, 11, 12, 13, 14, 15,
       16])

In [60]:
# index with a list using numpy
a = np.array([1,2,3,4,5,6,7,8,9])
a[[1,2,8]]

array([2, 3, 9])

In [61]:
np.any(filedata>10, axis=0)

array([False, False, False, False, False, False, False, False, False,
       False,  True,  True,  True,  True,  True,  True])

In [62]:
np.all(filedata > 10, axis=0)

array([False, False, False, False, False, False, False, False, False,
       False,  True,  True,  True,  True,  True,  True])