# NumPy ndarray: 多维数组对象

In [2]:
import numpy as np

In [3]:
#生成随机数组
data = np.random.randn(2,3)
data

array([[ 1.72147199,  0.52723103,  1.22733046],
       [-0.53278477,  1.72838235,  1.27616024]])

In [4]:
data * 10

array([[17.21471988,  5.2723103 , 12.27330463],
       [-5.32784768, 17.28382352, 12.76160239]])

In [5]:
data + data

array([[ 3.44294398,  1.05446206,  2.45466093],
       [-1.06556954,  3.4567647 ,  2.55232048]])

In [6]:
data.shape

(2, 3)

In [7]:
data.dtype

dtype('float64')

## 生成ndarray

In [8]:
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [9]:
data2 =[[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [10]:
arr2.ndim

2

In [11]:
arr2.shape

(2, 4)

In [12]:
arr1.dtype

dtype('float64')

In [13]:
arr2.dtype

dtype('int64')

In [14]:
#一次性创造全0的数组
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [15]:
np.zeros((2,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [16]:
#一次性创造全1的数据
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [17]:
np.ones((2,6,3))

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]])

In [18]:
#np.arange
np.arange(2,10)

array([2, 3, 4, 5, 6, 7, 8, 9])

In [19]:
data3 = (1,2,3,4)
arr3 = np.array(data3)
arr3

array([1, 2, 3, 4])

In [20]:
np.asarray({1,2,34})

array({1, 2, 34}, dtype=object)

In [21]:
np.ones_like(data3)

array([1, 1, 1, 1])

In [22]:
#astype 转换数组的数据类型

In [23]:
arr = np.array([1,2,3,4,5])
arr.dtype

dtype('int64')

In [24]:
float_arr = arr.astype(np.float64)
float_arr.dtype

dtype('float64')

In [25]:
arr = np.array([3.7,-1.2,-2.6,0.5,12.9,10.1])
arr.astype(np.int32)   
#浮点数换成整数，小数点后面的部分将被消除

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

## NumPy数组计算

In [26]:
arr = np.array([[1.,2.,3.],[4.,5.,6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [27]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [28]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [29]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [30]:
arr ** 2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [31]:
arr2 = np.array([[0,4,1],[7,2,12]])
arr > arr2

array([[ True, False,  True],
       [False,  True, False]])

## 基础索引与切片

In [32]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [33]:
arr[5]

5

In [34]:
arr[5:8]

array([5, 6, 7])

In [35]:
arr [5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [36]:
#[:]引用数值的所有值

In [37]:
arr [:] = 64
arr

array([64, 64, 64, 64, 64, 64, 64, 64, 64, 64])

In [38]:
# 若想要数据切片的拷贝而不是一份视图，需要复制这个数组


In [39]:
arr = np.arange(10)
arr[5:8].copy()

array([5, 6, 7])

In [40]:
#在一个二维数组中，每个索引值对应的元素不再是一个值，而是一个一维数组
#二维数组的索引，可以将0轴看作行，1轴看作列

In [98]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [42]:
arr2d[2]

array([7, 8, 9])

In [43]:
#获取单个元素
arr2d[2][0]

7

In [44]:
arr2d[2,0]

7

In [45]:
arr2d.ndim

2

In [46]:
arr2d.shape

(3, 3)

In [47]:
#在多维数组中，可以省略后续索引值，返回对象是降低一个维度的数组。
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [48]:
arr3d[0]  #返回一个2*3的二维数组

array([[1, 2, 3],
       [4, 5, 6]])

In [49]:
arr3d[0][1,2]

6

In [50]:
old_values = arr3d.copy()
arr3d[0]=42
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [51]:
arr3d = old_values

In [52]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [53]:
arr3d[0,0] 
#返回一维数组

array([1, 2, 3])

## 数组的切片索引

In [54]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [55]:
arr[1:6]

array([1, 2, 3, 4, 5])

In [56]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [57]:
#二维数组的切片，沿着0轴进行切片，arr2d[:2]表示arr2d的前两行
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [58]:
#进行多组切片、多组索引类似 ：这里先对0轴切片，再对1轴切片
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [59]:
arr2d[1,:2]

array([4, 5])

In [60]:
arr2d[:2,2]

array([3, 6])

In [61]:
arr2d[:,:1]

array([[1],
       [4],
       [7]])

## 布尔索引

In [62]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])

In [63]:
data = np.random.randn(7,4)

In [64]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [65]:
data

array([[-0.11850504, -1.64796629,  0.13409649, -0.14347568],
       [ 0.38609691,  1.75260387,  0.97945796,  1.20226051],
       [ 0.58226821, -0.0486393 , -0.18906233, -2.62796597],
       [ 1.00538466,  0.32921083,  0.63396736, -1.69348457],
       [ 0.61652128,  0.06917782, -1.18331913,  1.04962395],
       [-1.03080639, -0.26655422,  1.11637632,  1.67046081],
       [-0.49449839,  0.47730972,  0.56155555, -0.32219848]])

In [66]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [67]:
data[names=='Bob']

array([[-0.11850504, -1.64796629,  0.13409649, -0.14347568],
       [ 1.00538466,  0.32921083,  0.63396736, -1.69348457]])

In [68]:
#布尔数值的长度必须和数组轴索引长度一致，还可以用切片或者整数值对布尔值进行混合和匹配

In [69]:
data[names=='Bob',2:]

array([[ 0.13409649, -0.14347568],
       [ 0.63396736, -1.69348457]])

In [70]:
data[names=='Bob',3]

array([-0.14347568, -1.69348457])

In [71]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [72]:
#在条件表达式之前使用~对条件取反：
data[~(names != 'Bob')]

array([[-0.11850504, -1.64796629,  0.13409649, -0.14347568],
       [ 1.00538466,  0.32921083,  0.63396736, -1.69348457]])

In [73]:
cond = names == 'Bob'
data[~cond]

array([[ 0.38609691,  1.75260387,  0.97945796,  1.20226051],
       [ 0.58226821, -0.0486393 , -0.18906233, -2.62796597],
       [ 0.61652128,  0.06917782, -1.18331913,  1.04962395],
       [-1.03080639, -0.26655422,  1.11637632,  1.67046081],
       [-0.49449839,  0.47730972,  0.56155555, -0.32219848]])

In [74]:
#布尔值运算符and-&, or-|

In [75]:
mask = (names == 'Bob') | (names == 'Will')

In [76]:
data[mask]

array([[-0.11850504, -1.64796629,  0.13409649, -0.14347568],
       [ 0.58226821, -0.0486393 , -0.18906233, -2.62796597],
       [ 1.00538466,  0.32921083,  0.63396736, -1.69348457],
       [ 0.61652128,  0.06917782, -1.18331913,  1.04962395]])

In [77]:
data[data<0]=0
data

array([[0.        , 0.        , 0.13409649, 0.        ],
       [0.38609691, 1.75260387, 0.97945796, 1.20226051],
       [0.58226821, 0.        , 0.        , 0.        ],
       [1.00538466, 0.32921083, 0.63396736, 0.        ],
       [0.61652128, 0.06917782, 0.        , 1.04962395],
       [0.        , 0.        , 1.11637632, 1.67046081],
       [0.        , 0.47730972, 0.56155555, 0.        ]])

In [78]:
data[names =='Joe']=7
data

array([[0.        , 0.        , 0.13409649, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.58226821, 0.        , 0.        , 0.        ],
       [1.00538466, 0.32921083, 0.63396736, 0.        ],
       [0.61652128, 0.06917782, 0.        , 1.04962395],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ]])

## 神奇索引: 用于描述使用整数数组进行数据索引

In [79]:
arr  = np.empty((8,4))

In [80]:
for i in range(8):
    arr[i] = i 

In [81]:
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [82]:
#选出符合特定顺序的子集，简单传递一个包含知名所需顺序的列表或数组完成：
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [83]:
#使用负索引，从尾部开始选择：
arr[[-3,-5,-7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [84]:
arr = np.arange(32).reshape((8,4))

In [85]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [86]:
arr[[1,5,7,2],[0,2,1,2]]

array([ 4, 22, 29, 10])

In [87]:
arr[[1,5,7,2]]

array([[ 4,  5,  6,  7],
       [20, 21, 22, 23],
       [28, 29, 30, 31],
       [ 8,  9, 10, 11]])

In [88]:
arr[[1,5,7,2]][:,[0,3,1,2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

## 数组转置与换轴

In [89]:
arr = np.arange(15).reshape((3,5))

In [90]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [91]:
#转置
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [92]:
arr = np.random.randn(6,3)
arr

array([[ 1.27585143, -0.53288751,  1.08744535],
       [ 1.20763737,  1.46302914, -0.94773258],
       [ 0.88565988, -0.04565467, -1.14127632],
       [-0.32942867, -0.34841935,  0.33229091],
       [-0.27978445, -0.04445168, -0.79226284],
       [-1.25395468,  0.42835298, -0.50702438]])

In [93]:
#计算矩阵内积使用np.dot
np.dot(arr.T,arr)

array([[ 5.62978321,  0.63656985, -0.01989897],
       [ 0.63656985,  2.73336597, -2.2116864 ],
       [-0.01989897, -2.2116864 ,  4.37841747]])

In [94]:
#对于更高维度的数组转置,transpose可以接收包含轴编号的元组，用于置换轴
arr = np.arange(16).reshape((2,2,4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [95]:
arr.transpose((1,0,2))
#轴被重新排序，使得原先第二个轴变为第一个，第一个轴变为第二个

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [96]:
#.T进行转置是换轴的特殊案例，ndarry有一个swapaxes方法，接收一对轴编号作为参数，并对轴进行调整用于重组数据：
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [97]:
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])