In [1]:
import numpy as np

## Part1. numpy array的基本操作: reshape, join, split, delete, squeeze

### I. reshape

#### I.1 用A.reshape(dim1, dim2, ..., dimN)
   - reshape前后的size要一样，二维条件下size = row * column
   - reshape得到的新array是新建的array，原array形状不变
   - 一维numpy array的shape是(n,)，在broadcast规则中，会被扩展为(1, n)。但它不同于二维shape取(1, n)的场景。

In [2]:
a = np.arange(6)
b = np.arange(6).reshape(1, -1)
print(a, b)
print(a.shape, b.shape)

[0 1 2 3 4 5] [[0 1 2 3 4 5]]
(6,) (1, 6)


In [3]:
b.reshape(2, 3)
b

array([[0, 1, 2, 3, 4, 5]])

#### I.2 A.resize(dim1, dim2, ..., dimN)直接改变原array的形状

In [4]:
b.resize(2, 3)
b

array([[0, 1, 2],
       [3, 4, 5]])

#### I.3 A.ravel()和A.flatten()
 - 功能都是将矩阵flatten。区别是ravel得到的是原始array的View，flatten是copy

In [5]:
c = b.ravel()
d = b.flatten()
d[0] = 100
b, d

(array([[0, 1, 2],
        [3, 4, 5]]),
 array([100,   1,   2,   3,   4,   5]))

In [6]:
c[0] = 100
b, c

(array([[100,   1,   2],
        [  3,   4,   5]]),
 array([100,   1,   2,   3,   4,   5]))

In [49]:
a = np.arange(12).reshape(3, 2, 2)
b = a.reshape(2, 2, 3)
a.base is b.base

True

In [47]:
print(a.flatten())
print(b.flatten())

[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0  4  8  1  5  9  2  6 10  3  7 11]


#### I.4 expand dims
1. np.newaxis:让矩阵增加一个维度，但没有新增元素时，用None或者它的alias：np.newaxis
2. np.expand_dims(array, axis)，这里axis参数可以是tuple，此时同时扩展多个dims
- 他们都不改变原array的形状

In [7]:
# 用np.newaxis
np.newaxis is None

True

In [8]:
x = np.arange(3)
x.shape, x

((3,), array([0, 1, 2]))

In [9]:
x[np.newaxis, :] # 等价于np.expand_dims(x, axis=0)

array([[0, 1, 2]])

In [10]:
x[:, np.newaxis, None] # None就是np.newaxis

array([[[0]],

       [[1]],

       [[2]]])

In [11]:
# axis参数是tuple时，同时扩展多个dims
y = np.expand_dims(x, axis=(0, 1))
y.shape, y

((1, 1, 3), array([[[0, 1, 2]]]))

In [12]:
z = np.expand_dims(x, axis=(0, 2))
z.shape, z

((1, 3, 1),
 array([[[0],
         [1],
         [2]]]))

### II. joining

#### II.1 stacking
 - np.vstack(): stacking发生在第一个维度上(first axes)
 - np.hstack(): stacking发生在第二个维度上(second axes)

In [13]:
a = np.array([4., 2.])
b = np.array([3., 8.])
np.vstack((a, b))

array([[4., 2.],
       [3., 8.]])

In [14]:
np.hstack((a, b))

array([4., 2., 3., 8.])

In [15]:
c = np.arange(12).reshape(2, 3, 2)
c

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]]])

In [16]:
d = np.ones((2, 3, 2))
d

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [17]:
np.hstack((c, d)).shape, np.vstack((c, d)).shape

((2, 6, 2), (4, 3, 2))

#### II.2 concatenate
 - np.concatenate((a, b, ...), axis=0)可以自定义stacking发生的维度
 - 如果axis=None，那么被堆叠的arrays会先被flatten，然后join，得到一个1D array
 - 如果第一个参数是list，那么concatenate会执行对其元素的合并。常见用途是np.delete返回的是list，对它的返回值做concatenate的时候，直接是对list中的元素做合并

In [18]:
s = np.concatenate((c, d), axis=2)
s.shape, s

((2, 3, 4),
 array([[[ 0.,  1.,  1.,  1.],
         [ 2.,  3.,  1.,  1.],
         [ 4.,  5.,  1.,  1.]],
 
        [[ 6.,  7.,  1.,  1.],
         [ 8.,  9.,  1.,  1.],
         [10., 11.,  1.,  1.]]]))

In [19]:
np.concatenate((a, b), axis=None)

array([4., 2., 3., 8.])

In [20]:
a = [[1, 2, 3]]
b = [[4, 5, 6]]
np.concatenate([a, b], axis=0)  # 注意，这里第一个参数用'[]'和用‘()’效果一样

array([[1, 2, 3],
       [4, 5, 6]])

### III. splitting and deleting

#### III.1 split：np.vsplit(), np.hsplit(), np.split()
1. 三种method分割的维度不同
   - np.vsplit(array, indices or section): 在axis=0上分割。
   - np.hsplit(array, indices or section): 在axis=1上分割。
   - np.split(array, indices or section, axis=0): 对应前面的concatenate，自定义分割的维度
   - np.array_split(array, indices or section, axis=0): 和split功能基本一样，只是array指定的axis的length l不要求整除第二个参数的值，比如n。分割得到n份，前l % n份的size是l // n + 1，后面的是l // n。<font color=red>注，这里分法有点奇怪，是先每份给l // n,再把余数l % n分给前l % n份。</font>
2. 如果indices or section参数是1个整数n，分为n份，此时要确保ndim能被n整除；如果是1D array，那么该array中的数字就是分割位置
3. 都返回list of splitted array
4. 新得到的array的维度数量ndim和原array一样，不会减少维度。

In [21]:
x = np.arange(24).reshape(2, 4, 3)
x.shape, x

((2, 4, 3),
 array([[[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8],
         [ 9, 10, 11]],
 
        [[12, 13, 14],
         [15, 16, 17],
         [18, 19, 20],
         [21, 22, 23]]]))

In [22]:
# vsplit
v = np.vsplit(x, 2)
for i in v:
    print(i.shape)
print(v)

(1, 4, 3)
(1, 4, 3)
[array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]]]), array([[[12, 13, 14],
        [15, 16, 17],
        [18, 19, 20],
        [21, 22, 23]]])]


In [23]:
# hsplit
h = np.hsplit(x, 2)
for i in h:
    print(i.shape)
print(h)

(2, 2, 3)
(2, 2, 3)
[array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[12, 13, 14],
        [15, 16, 17]]]), array([[[ 6,  7,  8],
        [ 9, 10, 11]],

       [[18, 19, 20],
        [21, 22, 23]]])]


In [24]:
h_neq = np.hsplit(x, (1, 3))
for i in h_neq:
    print(i.shape)
print(h_neq)

(2, 1, 3)
(2, 2, 3)
(2, 1, 3)
[array([[[ 0,  1,  2]],

       [[12, 13, 14]]]), array([[[ 3,  4,  5],
        [ 6,  7,  8]],

       [[15, 16, 17],
        [18, 19, 20]]]), array([[[ 9, 10, 11]],

       [[21, 22, 23]]])]


In [25]:
# split和array_split
# 因为array_split更灵活，所以切cross validation的folds时，用它更合适
x = np.arange(8.0)
np.array_split(x, 3)  # 因为不能整除，所以np.split(x, 3)  的话会报错

[array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7.])]

In [26]:
x = np.arange(9)
np.array_split(x, 4)

[array([0, 1, 2]), array([3, 4]), array([5, 6]), array([7, 8])]

#### III.2 delete
 - np.delete(array_name, index, axis=0)删除axis指定维度上，由index指定位置的data
 - 返回一个新的array，这个array

In [27]:
arr = np.arange(6) + 1
np.delete(arr, [0,2,4], axis=0)

array([2, 4, 6])

In [28]:
# 效果相当于用mask执行下面语句
mask = np.ones(len(arr), dtype=bool)
mask[[0, 2, 4]] = False
arr, mask, arr[mask]

(array([1, 2, 3, 4, 5, 6]),
 array([False,  True, False,  True, False,  True]),
 array([2, 4, 6]))

In [29]:
a = np.arange(12).reshape(3, 4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [30]:
np.delete(a, 1, axis=0)

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11]])

In [31]:
np.delete(a, 1, axis=1)

array([[ 0,  2,  3],
       [ 4,  6,  7],
       [ 8, 10, 11]])

### IV. squeeze
np.squeeze(array, axis=None)的功能是将array中length为1的dim去掉

In [32]:
x = np.array([[[0], [1], [2]]])
x.shape

(1, 3, 1)

In [33]:
np.squeeze(x).shape

(3,)

In [34]:
# 指定特定dim被sequeeze
np.squeeze(x, axis=2).shape

(1, 3)

In [35]:
# squeeze到极限的时候要注意shape的问题
y = np.array([[1234]])
s = y.squeeze()
s.shape, s

((), array(1234))

In [36]:
# 注意下面两种array的shape差异
a = np.array(1)   # 这个array的dim=0
b = np.array([1]) # 这个array的dim=1
a.shape, b.shape

((), (1,))

In [37]:
# a[0] # Error
a[()], b[0]

(1, 1)