# Numpy tutorial

In [1]:
import numpy as np

## Numpy introduction
- fundamental package for scientific computing
- provide multi-dimensional array object which is efficient in memory usage and computing speed
- widely used on machine learning / data science

---

# Create numpy array

In [29]:
x = np.array([1,2,3])
print(type(x))
print(x.dtype)
print(x)
print(x.shape)

<class 'numpy.ndarray'>
int64
[1 2 3]
(3,)


In [30]:
x = np.array([[1,2.1,3],[4,5,6.2]])
print(type(x))
print(x.dtype)
print(x)
print(x.shape)

<class 'numpy.ndarray'>
float64
[[1.  2.1 3. ]
 [4.  5.  6.2]]
(2, 3)


#### np.zeros

In [17]:
zeros = np.zeros(4)
print(zeros)

[0. 0. 0. 0.]


#### np.ones

In [15]:
ones = np.ones(3)
print(ones)

[1. 1. 1.]


#### np.zeros_like

In [22]:
data_list = [1,2,3,4,5]

zeros_l = np.zeros_like(data_list)
print(zeros_l)

[0 0 0 0 0]


#### np.ones_like

In [16]:
data_list = ['a','b','c','d']

ones_l = np.ones_like(data_list)
print(ones_l)

['1' '1' '1' '1']


#### np.arange

In [24]:
arange = np.arange(15)
print(arange)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


#### np.empty

In [28]:
np.empty(10, dtype = int)

array([    140213927201736,            22598784,                   0,
                         0,                   0, 7305742614159112034,
       7004273215998801965, 3256213827723538786, 7162243161939588450,
                 892351545])

#### np.diag

In [4]:
np.diag([1,2.,3])

array([[1., 0., 0.],
       [0., 2., 0.],
       [0., 0., 3.]])

In [8]:
x = np.array([[1.,2,3],[4,5,6],[7,8,9]])
print(x)

print(np.diag(x))

[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]
[1. 5. 9.]


In [88]:
x = np.linspace(0, 20, 10)
x

array([ 0.        ,  2.22222222,  4.44444444,  6.66666667,  8.88888889,
       11.11111111, 13.33333333, 15.55555556, 17.77777778, 20.        ])

---

# Random module

In [13]:
x = np.random.rand(2,3)
print(x)

[[0.2143403  0.76654888 0.00112509]
 [0.16763422 0.12615828 0.19065696]]


In [15]:
x = np.random.random(5)
print(x)

[0.38627261 0.49135848 0.60666573 0.83739782 0.49424954]


In [17]:
x = np.random.randint(0, 10, size = 5)
print(x)

[1 5 6 9 4]


---

# Reshape

In [9]:
x = np.array([1,2,3,4,5,6])

print(x.shape)

(6,)


In [10]:
print(x.reshape(1,6).shape)
print(x.reshape(2,3).shape)
print(x.reshape(2,3,1).shape)
print(x.reshape(3,-1).shape)

(1, 6)
(2, 3)
(2, 3, 1)
(3, 2)


---

# Element-wise / Matrix operations

In [21]:
x = np.arange(12).reshape(3,4)
y = np.arange(12).reshape(3,4)+1

print(x)
print(y)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


## add

In [22]:
x + y

array([[ 1,  3,  5,  7],
       [ 9, 11, 13, 15],
       [17, 19, 21, 23]])

In [32]:
np.add(x,y)

array([[ 1,  3,  5,  7],
       [ 9, 11, 13, 15],
       [17, 19, 21, 23]])

## subtract

In [23]:
x-y

array([[-1, -1, -1, -1],
       [-1, -1, -1, -1],
       [-1, -1, -1, -1]])

In [33]:
np.subtract(x,y)

array([[-1, -1, -1, -1],
       [-1, -1, -1, -1],
       [-1, -1, -1, -1]])

## multiply

In [24]:
x * y

array([[  0,   2,   6,  12],
       [ 20,  30,  42,  56],
       [ 72,  90, 110, 132]])

In [29]:
np.multiply(x, y)

array([[  0,   2,   6,  12],
       [ 20,  30,  42,  56],
       [ 72,  90, 110, 132]])

## divide

In [31]:
x / y

array([[0.        , 0.5       , 0.66666667, 0.75      ],
       [0.8       , 0.83333333, 0.85714286, 0.875     ],
       [0.88888889, 0.9       , 0.90909091, 0.91666667]])

In [30]:
np.divide(x, y)

array([[0.        , 0.5       , 0.66666667, 0.75      ],
       [0.8       , 0.83333333, 0.85714286, 0.875     ],
       [0.88888889, 0.9       , 0.90909091, 0.91666667]])

## dot

In [39]:
x @ y.transpose()

array([[ 20,  44,  68],
       [ 60, 148, 236],
       [100, 252, 404]])

In [26]:
np.dot(x, y)

ValueError: shapes (3,4) and (3,4) not aligned: 4 (dim 1) != 3 (dim 0)

In [27]:
np.dot(x, y.transpose())

array([[ 20,  44,  68],
       [ 60, 148, 236],
       [100, 252, 404]])

In [37]:
np.matmul(x,y.transpose())

array([[ 20,  44,  68],
       [ 60, 148, 236],
       [100, 252, 404]])

## outer

In [41]:
np.outer(x, y)

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12],
       [  2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24],
       [  3,   6,   9,  12,  15,  18,  21,  24,  27,  30,  33,  36],
       [  4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,  48],
       [  5,  10,  15,  20,  25,  30,  35,  40,  45,  50,  55,  60],
       [  6,  12,  18,  24,  30,  36,  42,  48,  54,  60,  66,  72],
       [  7,  14,  21,  28,  35,  42,  49,  56,  63,  70,  77,  84],
       [  8,  16,  24,  32,  40,  48,  56,  64,  72,  80,  88,  96],
       [  9,  18,  27,  36,  45,  54,  63,  72,  81,  90,  99, 108],
       [ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120],
       [ 11,  22,  33,  44,  55,  66,  77,  88,  99, 110, 121, 132]])

---

# Statistics and Math

In [111]:
x = np.arange(12).reshape(3,4)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

## max

In [43]:
np.max(x)

11

In [45]:
np.max(x, axis = 0)

array([ 8,  9, 10, 11])

## min

In [46]:
np.min(x)

0

In [47]:
np.min(x, axis = 1)

array([0, 4, 8])

## mean

In [113]:
np.mean(x)

5.5

In [114]:
np.mean(x, axis = 0)

array([4., 5., 6., 7.])

## correlation

In [57]:
x = np.random.randn(50).reshape(5,10)

np.corrcoef(x) #注意，這邊是針對row做correlation (把row看成一個欄位資料而非觀察值)

array([[ 1.        ,  0.16709292, -0.43893612,  0.08544951,  0.17898792],
       [ 0.16709292,  1.        , -0.2316344 ,  0.67383842, -0.61834854],
       [-0.43893612, -0.2316344 ,  1.        , -0.22799868,  0.39594975],
       [ 0.08544951,  0.67383842, -0.22799868,  1.        , -0.71846294],
       [ 0.17898792, -0.61834854,  0.39594975, -0.71846294,  1.        ]])

## log

In [58]:
np.log(3)

1.0986122886681098

In [59]:
np.log10(3)

0.47712125471966244

## exp

In [63]:
np.exp([1,2,3])

array([ 2.71828183,  7.3890561 , 20.08553692])

## cos

In [64]:
np.cos([1,2,3])

array([ 0.54030231, -0.41614684, -0.9899925 ])

## sin

In [65]:
np.sin([1,2,3])

array([0.84147098, 0.90929743, 0.14112001])

---

# Manipulation

## slicing

In [82]:
x = np.arange(12).reshape(3,4)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [69]:
x[0]

array([0, 1, 2, 3])

In [74]:
x[1,0:2]

array([4, 5])

In [73]:
x[1:2,0:2]

array([[4, 5]])

In [75]:
x[:,1]

array([1, 5, 9])

In [83]:
x[:,::-1]

array([[ 3,  2,  1,  0],
       [ 7,  6,  5,  4],
       [11, 10,  9,  8]])

## set value

In [84]:
x = np.arange(12).reshape(3,4)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [85]:
x[0:1,:] = -100
x

array([[-100, -100, -100, -100],
       [   4,    5,    6,    7],
       [   8,    9,   10,   11]])

## boolean operation / isin / isnan / equal

In [89]:
x = np.array([1, np.nan, 2, 3])

In [93]:
x>1

  """Entry point for launching an IPython kernel.


array([False, False,  True,  True])

In [91]:
np.isin(x, [1,3])

array([ True, False, False,  True])

In [92]:
np.isnan(x)

array([False,  True, False, False])

In [96]:
y = np.arange(4)
print(y)

np.equal(x, y)

[0 1 2 3]


array([False, False,  True,  True])

## concatenation

In [99]:
x = np.arange(12, dtype = 'float').reshape(4,3)
y = np.random.randn(12).reshape(4,3)

print(x)
print(y)

[[ 0.  1.  2.]
 [ 3.  4.  5.]
 [ 6.  7.  8.]
 [ 9. 10. 11.]]
[[ 0.04148076  0.25320189  0.96329368]
 [-1.17026074  0.68099719  0.21367633]
 [-1.52833829  2.26452054 -0.92049164]
 [-0.31694742  0.70464917  0.81967559]]


### concatenate by row

In [105]:
z = np.concatenate((x, y), axis = 0)
print(z)
print(z.shape)

[[ 0.          1.          2.        ]
 [ 3.          4.          5.        ]
 [ 6.          7.          8.        ]
 [ 9.         10.         11.        ]
 [ 0.04148076  0.25320189  0.96329368]
 [-1.17026074  0.68099719  0.21367633]
 [-1.52833829  2.26452054 -0.92049164]
 [-0.31694742  0.70464917  0.81967559]]
(8, 3)


In [107]:
z = np.vstack((x, y))
print(z)
print(z.shape)

[[ 0.          1.          2.        ]
 [ 3.          4.          5.        ]
 [ 6.          7.          8.        ]
 [ 9.         10.         11.        ]
 [ 0.04148076  0.25320189  0.96329368]
 [-1.17026074  0.68099719  0.21367633]
 [-1.52833829  2.26452054 -0.92049164]
 [-0.31694742  0.70464917  0.81967559]]
(8, 3)


### concatenate by column

In [104]:
z = np.concatenate((x, y), axis = 1)
print(z)
print(z.shape)

[[ 0.          1.          2.          0.04148076  0.25320189  0.96329368]
 [ 3.          4.          5.         -1.17026074  0.68099719  0.21367633]
 [ 6.          7.          8.         -1.52833829  2.26452054 -0.92049164]
 [ 9.         10.         11.         -0.31694742  0.70464917  0.81967559]]
(4, 6)


In [108]:
z = np.hstack((x, y))
print(z)
print(z.shape)

[[ 0.          1.          2.          0.04148076  0.25320189  0.96329368]
 [ 3.          4.          5.         -1.17026074  0.68099719  0.21367633]
 [ 6.          7.          8.         -1.52833829  2.26452054 -0.92049164]
 [ 9.         10.         11.         -0.31694742  0.70464917  0.81967559]]
(4, 6)
