In [1]:
import numpy as np

## Arrays: initialize

numpy.ndarray: N-dimensional array 

In [2]:
a = np.array([1, 2, 3])

print(f'a : {a}')
print(f'type : {type(a)}')
print(f'dtype : {a.dtype}')
print(f'shape : {a.shape}')
print(a[0], a[1], a[2])

a : [1 2 3]
type : <class 'numpy.ndarray'>
dtype : int64
shape : (3,)
1 2 3


In [3]:
a[0] = 5

print(a)

[5 2 3]


In [4]:
b = np.array([[1,2,3],[4,5,6]])

print(f'b :\n{b}')
print(f'shape : {b.shape}')
print(b[0, 0], b[0, 1], b[1, 0])

b :
[[1 2 3]
 [4 5 6]]
shape : (2, 3)
1 2 4


In [5]:
np.zeros((2,2))

array([[0., 0.],
       [0., 0.]])

In [6]:
np.ones((1,2))

array([[1., 1.]])

In [7]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [8]:
np.array([1, 2, 3])

array([1, 2, 3])

In [9]:
# 위의 array 는 2가 int 인 "2"로 출력되지만, 이 array 에서는 "2." 처럼 float 로 출력됩니다.
np.array([1.1, 2, 3])

array([1.1, 2. , 3. ])

## dtype

In [10]:
a = np.array([1.12, 2, 3])

print(f'a : {a}')
print(f'dtype : {a.dtype}')

a : [1.12 2.   3.  ]
dtype : float64


In [11]:
a = np.array([1, 2, 3], dtype=np.float)

print(f'a : {a}')
print(f'dtype : {a.dtype}')

a : [1. 2. 3.]
dtype : float64


In [12]:
# 때로는 array 를 반드시 int 형으로 만들어야 할 때가 있습니다.
# 예를 들어 index 를 저장하는 array 는 실수가 입력되면 slicing 이 작동하지 않습니다.
print(np.zeros((2,2)))
print(np.zeros((2,2), dtype=np.int))

[[0. 0.]
 [0. 0.]]
[[0 0]
 [0 0]]


In [13]:
# list of float 를 dtype=np.int 로 만들면 float 이 int 로 변환됩니다.
np.asarray([1.1, 2, 3], dtype=np.int)

array([1, 2, 3])

## random

In [14]:
np.random.random_sample((3,2))

array([[0.11103257, 0.01034779],
       [0.85918842, 0.79929568],
       [0.54344155, 0.67885097]])

In [15]:
# exclude high; [low, high)
np.random.randint(low=0, high=4, size=(5,2))

array([[3, 1],
       [0, 3],
       [3, 3],
       [1, 2],
       [2, 0]])

In [16]:
np.random.permutation(5)

array([3, 1, 4, 0, 2])

In [17]:
np.random.permutation(2 + np.arange(5))

array([5, 2, 6, 3, 4])

## reshape

In [18]:
x = np.random.random_sample((6,2))

print(x)

[[0.27499104 0.54316798]
 [0.15497349 0.06642739]
 [0.56163552 0.5277631 ]
 [0.04997475 0.59105566]
 [0.54594774 0.74653601]
 [0.93401443 0.73149026]]


In [19]:
# (1, all)
print(x.reshape(1,-1).shape)
print(x.reshape(1,-1))

(1, 12)
[[0.27499104 0.54316798 0.15497349 0.06642739 0.56163552 0.5277631
  0.04997475 0.59105566 0.54594774 0.74653601 0.93401443 0.73149026]]


In [20]:
# (all, 1)
print(x.reshape(-1,1).shape)
print(x.reshape(-1,1))

(12, 1)
[[0.27499104]
 [0.54316798]
 [0.15497349]
 [0.06642739]
 [0.56163552]
 [0.5277631 ]
 [0.04997475]
 [0.59105566]
 [0.54594774]
 [0.74653601]
 [0.93401443]
 [0.73149026]]


In [21]:
# (all,)
print(x.reshape(-1).shape)
print(x.reshape(-1))

(12,)
[0.27499104 0.54316798 0.15497349 0.06642739 0.56163552 0.5277631
 0.04997475 0.59105566 0.54594774 0.74653601 0.93401443 0.73149026]


In [22]:
# (n_rows, n_cols)
print(x.reshape(2,6).shape)
print(x.reshape(2,6))

(2, 6)
[[0.27499104 0.54316798 0.15497349 0.06642739 0.56163552 0.5277631 ]
 [0.04997475 0.59105566 0.54594774 0.74653601 0.93401443 0.73149026]]


In [23]:
# (axis 1, axis 2, axis 3) # tensor
print(x.reshape(2,2,3).shape)
print(x.reshape(2,2,3))

(2, 2, 3)
[[[0.27499104 0.54316798 0.15497349]
  [0.06642739 0.56163552 0.5277631 ]]

 [[0.04997475 0.59105566 0.54594774]
  [0.74653601 0.93401443 0.73149026]]]


In [24]:
# 반드시 배수가 맞아야 합니다.
x.reshape(7,-1)

ValueError: cannot reshape array of size 12 into shape (7,newaxis)

## slicing

In [25]:
a = np.arange(12).reshape(3,4)

print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [26]:
a[:2,:3]

array([[0, 1, 2],
       [4, 5, 6]])

In [27]:
b = a[1,1:3]

print(b)

[5 6]


In [28]:
rows = np.asarray([0, 2, 0, 1])
cols = np.asarray([0, 3, 2, 1])
a[rows,cols]

array([ 0, 11,  2,  5])

In [29]:
c = a.reshape(-1)
c[-3:-1]

array([ 9, 10])

## Boolean and where

In [30]:
a % 2 == 0

array([[ True, False,  True, False],
       [ True, False,  True, False],
       [ True, False,  True, False]])

In [31]:
a[ (a % 2 == 0) ]

array([ 0,  2,  4,  6,  8, 10])

In [32]:
np.where(a > 5)

(array([1, 1, 2, 2, 2, 2]), array([2, 3, 0, 1, 2, 3]))

In [33]:
# rows, cols 를 동시에 확인하며 각각 a[i,j] 의 값을 선택합니다.
rows, cols = np.where(a > 5)
a[rows,cols]

array([ 6,  7,  8,  9, 10, 11])

## Array math

In [34]:
x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)
u = np.array([0.1, 0.5])

print(x, end='\n\n')
print(y, end='\n\n')
print(u)

[[1. 2.]
 [3. 4.]]

[[5. 6.]
 [7. 8.]]

[0.1 0.5]


In [35]:
x + y

array([[ 6.,  8.],
       [10., 12.]])

In [36]:
x - y

array([[-4., -4.],
       [-4., -4.]])

In [37]:
x * y

array([[ 5., 12.],
       [21., 32.]])

In [38]:
x / y

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

In [39]:
np.dot(x, y)

array([[19., 22.],
       [43., 50.]])

In [40]:
x ** 2

array([[ 1.,  4.],
       [ 9., 16.]])

In [41]:
np.power(x, 2)

array([[ 1.,  4.],
       [ 9., 16.]])

In [42]:
np.exp(x)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [43]:
np.sqrt(x)

array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])

In [44]:
# (n,m), (n,) 크기의 행렬의 "*" 연산은 열을 기준으로 이뤄집니다.
print(x)
print(u)
x * u

[[1. 2.]
 [3. 4.]]
[0.1 0.5]


array([[0.1, 1. ],
       [0.3, 2. ]])

In [45]:
# 이는 순서를 바꾸어도 동일합니다.
u * x

array([[0.1, 1. ],
       [0.3, 2. ]])

In [46]:
# 행을 기준으로 곱샘을 하려면 numpy.newaxis 를 (n,) 행렬의 두번째 축으로 입력합니다.
x * u[:,np.newaxis]

array([[0.1, 0.2],
       [1.5, 2. ]])

In [47]:
# transform 은 (n,m) 행렬을 (m,n) 으로 만듭니다.
x.T

array([[1., 3.],
       [2., 4.]])

In [48]:
(x.T * u).T

array([[0.1, 0.2],
       [1.5, 2. ]])

In [49]:
print(x)
x.mean()

[[1. 2.]
 [3. 4.]]


2.5

In [50]:
# axis = 0 은 행, axis = 1 은 열 입니다.
x.mean(axis=0)

array([2., 3.])

In [51]:
x.mean(axis=1)

array([1.5, 3.5])

In [52]:
x.sum(axis=1)

array([3., 7.])

In [53]:
x.std(axis=1)

array([0.5, 0.5])

## broadcasting

In [54]:
x = np.arange(12).reshape(4,3)
y = np.array([1.1, 0, 2])

print(x)
print(y)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[1.1 0.  2. ]


In [55]:
for i in range(x.shape[0]):
    x[i,:] = x[i,:] + y

print(x)

[[ 1  1  4]
 [ 4  4  7]
 [ 7  7 10]
 [10 10 13]]


In [56]:
x = np.arange(12, dtype=np.float).reshape(4,3)

for i in range(x.shape[0]):
    x[i,:] = x[i,:] + y

print(x)

[[ 1.1  1.   4. ]
 [ 4.1  4.   7. ]
 [ 7.1  7.  10. ]
 [10.1 10.  13. ]]


## sorting

In [57]:
x = np.random.random_sample(6)

print(x)

[0.58580021 0.86388498 0.42526309 0.63746234 0.24761122 0.17345946]


In [58]:
# `argsort()` 는 인덱스를 return 합니다.
print(x.argsort())
print(x)

[5 4 2 0 3 1]
[0.58580021 0.86388498 0.42526309 0.63746234 0.24761122 0.17345946]


In [59]:
# `sort()` 는 array 의 값을 수정합니다.
x.sort()

print(x)
print(x.argsort())

[0.17345946 0.24761122 0.42526309 0.58580021 0.63746234 0.86388498]
[0 1 2 3 4 5]


In [60]:
x = np.random.random_sample(6)

print(x)

sorted_indices = x.argsort()
x[sorted_indices[:3]]

[0.52639975 0.37617311 0.38294373 0.35272178 0.59852475 0.17770019]


array([0.17770019, 0.35272178, 0.37617311])

In [61]:
x = x.reshape(3,2)

print(x)
print(x.argsort(axis=0))
print(x.argsort(axis=1))

[[0.52639975 0.37617311]
 [0.38294373 0.35272178]
 [0.59852475 0.17770019]]
[[1 2]
 [0 1]
 [2 0]]
[[1 0]
 [1 0]
 [1 0]]


## distance

In [62]:
from sklearn.metrics import pairwise_distances

x = np.random.random_sample((100,5))
y = np.random.random_sample((2,5))

dist = pairwise_distances(y, x, metric='euclidean')
dist.shape

(2, 100)

In [63]:
k = 3
topk_indices = dist.argsort(axis=1)[:,:k]

print(topk_indices)
print(topk_indices.shape)

[[ 0 50 49]
 [69 43 90]]
(2, 3)


In [64]:
rows = np.asarray([row for row in range(y.shape[0]) for _ in range(k)])
cols = topk_indices.flatten()

print(rows)
print(cols)

[0 0 0 1 1 1]
[ 0 50 49 69 43 90]


In [65]:
topk_dist = dist[rows, cols].reshape(-1, k)
topk_dist

array([[0.10634553, 0.25154972, 0.40086118],
       [0.2058928 , 0.26466561, 0.37768934]])

## unique & bincount

In [66]:
x = np.random.randint(0, 5, size=(20,))
x

array([0, 3, 2, 3, 3, 0, 0, 1, 2, 4, 2, 2, 1, 0, 0, 3, 0, 1, 1, 2])

In [67]:
np.unique(x)

array([0, 1, 2, 3, 4])

In [68]:
n_unique = np.unique(x).shape[0]
np.bincount(x, minlength=n_unique)

array([6, 4, 5, 4, 1])

## Read more

- Official tutorial: https://docs.scipy.org/doc/numpy/reference/
- Stanford Univ. CS231 tutorial: http://cs231n.github.io/python-numpy-tutorial/