In [1]:
import numpy as np
from scipy import stats

## 生成ndarray对象

### from list

In [13]:
arr = np.array([0, 0, 1, 3, 4, 0, 4, 3])

In [15]:
len(arr)

8

### 全0矩阵

In [22]:
np.zeros((2, 2))

array([[0., 0.],
       [0., 0.]])

### 全1矩阵

In [23]:
np.ones((2, 2))

array([[1., 1.],
       [1., 1.]])

In [29]:
# 值不确定
np.empty((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

### 单位矩阵

In [32]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [33]:
np.eye(3, M=5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.]])

In [35]:
np.eye(3, M=5, k=3)

array([[0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.]])

#### 只有对角线上有元素的矩阵

In [66]:
np.diag([2, 6, 3])

array([[2, 0, 0],
       [0, 6, 0],
       [0, 0, 3]])

### buildFromFunction

In [44]:
np.fromfunction(lambda i,j: i+j, shape=(2, 3))

array([[0., 1., 2.],
       [1., 2., 3.]])

### 随机数填充的矩阵

#### [0, 1)均匀分布

In [30]:
np.random.rand(2, 3)

array([[0.655557  , 0.65903332, 0.65996999],
       [0.16087678, 0.07359523, 0.6661718 ]])

In [36]:
np.random.random((2, 3))

array([[0.19230652, 0.58237661, 0.13920882],
       [0.98213761, 0.65351736, 0.15617274]])

#### 标准正态分布

In [31]:
np.random.randn(2, 3)

array([[ 0.19991434, -0.65105904, -0.07307731],
       [-1.38931621,  1.51679815, -0.0516504 ]])

In [38]:
np.random.normal(loc=0, scale=1, size=(2, 3))

array([[ 0.29398565,  0.26853925,  1.36371783],
       [-1.14327918,  1.98972488,  0.74267586]])

#### 指定随机种子

In [45]:
np.random.RandomState(seed=14).rand(2, 3)

array([[0.51394334, 0.77316505, 0.87042769],
       [0.00804695, 0.30973593, 0.95760374]])

In [46]:
np.random.RandomState(seed=14).rand(2, 3)

array([[0.51394334, 0.77316505, 0.87042769],
       [0.00804695, 0.30973593, 0.95760374]])

### 生成matrix对象

In [50]:
arr = np.arange(9).reshape((3, 3))
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [51]:
np.asmatrix(arr)

matrix([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [53]:
np.matrix(arr)

matrix([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

## 维度转换

### reshape

In [70]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [15]:
arr.reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [21]:
'''
Return a copy of the array collapsed(坍塌) into one dimension.
@order: {'C', 'F', 'A', 'K'}, optional
'C' means to flatten in row-major (C-style) order.
'F' means to flatten in column-major (Fortran-
style) order. 'A' means to flatten in column-major
order if `a` is Fortran *contiguous* in memory,
row-major order otherwise. 'K' means to flatten
`a` in the order the elements occur in memory.
The default is 'C'.
'''
arr.reshape((2, 4), order='F')

array([[0, 2, 4, 6],
       [1, 3, 5, 7]])

### 打平成1维

In [17]:
arr.ravel()

array([0, 0, 1, 3, 4, 0, 4, 3])

In [18]:
arr.reshape((2, 4)).ravel()

array([0, 0, 1, 3, 4, 0, 4, 3])

In [20]:
np.ravel(arr.reshape((2, 4)))

array([0, 0, 1, 3, 4, 0, 4, 3])

In [11]:
np.arange(8).reshape((2, 4)).flat

<numpy.flatiter at 0x7fc4c48a2a00>

In [13]:
for a in np.arange(8).reshape((2, 4)).flat:
    print(a)

0
1
2
3
4
5
6
7


In [12]:
np.arange(8).reshape((2, 4)).flatten()

array([0, 1, 2, 3, 4, 5, 6, 7])

### 增加一个维度

In [18]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [20]:
arr.reshape((-1, 1))

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

In [72]:
np.expand_dims(arr, axis=0)

array([[0, 1, 2, 3, 4, 5, 6, 7]])

In [73]:
np.expand_dims(arr, axis=1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

In [19]:
arr[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

### ndarray合并

In [61]:
arr1 = np.arange(8).reshape((2, 4))
arr2 = np.random.randn(2, 4)

In [62]:
np.concatenate((arr1, arr2))

array([[ 0.        ,  1.        ,  2.        ,  3.        ],
       [ 4.        ,  5.        ,  6.        ,  7.        ],
       [-0.20558031, -1.87406768,  0.66159744, -2.27991831],
       [ 1.80279172, -0.57716586, -0.58042856, -0.65255574]])

In [63]:
np.concatenate((arr1, arr2), axis=1)

array([[ 0.        ,  1.        ,  2.        ,  3.        , -0.20558031,
        -1.87406768,  0.66159744, -2.27991831],
       [ 4.        ,  5.        ,  6.        ,  7.        ,  1.80279172,
        -0.57716586, -0.58042856, -0.65255574]])

In [64]:
np.vstack((arr1, arr2))

array([[ 0.        ,  1.        ,  2.        ,  3.        ],
       [ 4.        ,  5.        ,  6.        ,  7.        ],
       [-0.20558031, -1.87406768,  0.66159744, -2.27991831],
       [ 1.80279172, -0.57716586, -0.58042856, -0.65255574]])

In [65]:
np.hstack((arr1, arr2))

array([[ 0.        ,  1.        ,  2.        ,  3.        , -0.20558031,
        -1.87406768,  0.66159744, -2.27991831],
       [ 4.        ,  5.        ,  6.        ,  7.        ,  1.80279172,
        -0.57716586, -0.58042856, -0.65255574]])

## 赋值

In [2]:
arr = np.arange(8).reshape((2, 4))
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [3]:
arr[:, 2] = [8, 9]

In [4]:
arr

array([[0, 1, 8, 3],
       [4, 5, 9, 7]])

## 描述性统计

In [None]:
max(), min(), sum(), sqrt(), square()

In [6]:
arr = np.arange(8).reshape((2, 4))
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

### mean

In [7]:
np.mean(arr)

3.5

In [8]:
np.mean(arr, axis=0)

array([2., 3., 4., 5.])

In [9]:
arr.mean(axis=0)

array([2., 3., 4., 5.])

### 中位数median

In [10]:
np.median(arr)

5.0

In [80]:
arr1

array([[0, 1, 2],
       [3, 4, 5]])

### prod乘积。
* axis=None: 返回所有元素乘积
* axis=0: 返回每列的乘积

In [81]:
np.prod(arr1)

0

In [82]:
np.prod(arr1, axis=0)

array([ 0,  4, 10])

### 众数

In [5]:
arr = np.array([1, 9, 6, 4, 8, 5, 7, 3, 2, 8, 3, 8, 3, 9, 7, 9, 5, 1, 5, 3])

In [6]:
np.mode(arr)

AttributeError: module 'numpy' has no attribute 'mode'

#### 计算非负整数的出现次数
Count number of occurrences of each value in array of non-negative ints.

In [11]:
np.bincount(arr)

array([0, 2, 1, 4, 1, 3, 1, 2, 3, 3])

In [9]:
#np中没有直接求众数的方法
#bincount统计了每个索引位置出现的次数，注意arr的元素只能是正整数
#如结果第一个元素0表示0在arr中出现了0次
# #间接求众数
np.argmax(np.bincount(arr))

3

In [10]:
r = stats.mode(arr)  # 表示3出现了4次
r

ModeResult(mode=array([3]), count=array([4]))

In [11]:
r.mode

array([3])

In [12]:
r.count

array([4])

### ndarray中非0元素的个数

In [14]:
np.count_nonzero(arr)

5

In [16]:
np.count_nonzero(arr.reshape((2, 4)))

5

## elementwise运算

In [74]:
arr1 = np.arange(6).reshape((2, 3))
arr1

array([[0, 1, 2],
       [3, 4, 5]])

In [75]:
arr1 - 1

array([[-1,  0,  1],
       [ 2,  3,  4]])

In [76]:
arr1 - np.array([[1],
                [2]])

array([[-1,  0,  1],
       [ 1,  2,  3]])

In [77]:
arr1 - np.array([1, 2, 3])

array([[-1, -1, -1],
       [ 2,  2,  2]])

In [78]:
arr1 - np.array([[1, 2, 3]])

array([[-1, -1, -1],
       [ 2,  2,  2]])

In [79]:
arr1 - np.array([[1, 2]])

ValueError: operands could not be broadcast together with shapes (2,3) (1,2) 

### 对应位置的元素相乘

In [6]:
arr1 * arr1

array([[ 0,  1,  4],
       [ 9, 16, 25]])

## 线性代数&矩阵运算

### 求逆

In [83]:
arr = np.random.RandomState(14).randn(3, 3)
arr

array([[ 1.55133911,  0.07918602,  0.17397653],
       [-0.07233657, -2.0043294 ,  0.14467781],
       [-1.50116862,  0.21110945, -0.55820506]])

In [59]:
np.linalg.inv(arr)

array([[ 0.95314825,  0.07088078,  0.31544017],
       [-0.22558183, -0.52969762, -0.20759654],
       [-2.64859426, -0.39094625, -2.71827435]])

### 行列式

In [60]:
np.linalg.det(arr)

1.1417782564454833

### norm 范数

In [84]:
arr

array([[ 1.55133911,  0.07918602,  0.17397653],
       [-0.07233657, -2.0043294 ,  0.14467781],
       [-1.50116862,  0.21110945, -0.55820506]])

#### 向量的范数

In [89]:
np.linalg.norm(arr.ravel())

3.0160171097951203

##### 可以看出, 默认参数下 是向量的2范数就是每个元素的平方和再开平方根 = 向量模长！

In [90]:
np.sqrt(arr.ravel().dot(arr.ravel()))

3.0160171097951203

#### 矩阵的范数

In [85]:
np.linalg.norm(arr)

3.0160171097951203

In [87]:
np.abs(arr)

array([[1.55133911, 0.07918602, 0.17397653],
       [0.07233657, 2.0043294 , 0.14467781],
       [1.50116862, 0.21110945, 0.55820506]])

In [88]:
np.sum(np.abs(arr), axis=0)

array([3.1248443 , 2.29462488, 0.8768594 ])

### SVD分解

In [67]:
a = np.arange(48).reshape((12, 4))
#svd分解
U, s, Vh = np.linalg.svd(a, full_matrices=True)

In [69]:
U.shape, s, Vh.shape

((12, 12),
 array([1.88956663e+02, 3.92167338e+00, 1.34838487e-14, 9.73158362e-16]),
 (4, 4))

## ndarray数据保存和加载

### 默认情况下，数组以未压缩的原始二进制格式保存在扩展名为npy的文件中

In [None]:
np.save('name.npy', arr)

### 加载save保存的npy文件

In [None]:
arr = np.load('name.npy')

### 将1D/2D数组写入以某种分隔符隔开的文本文件中, 注意保存三维以上的数据会报错

In [None]:
np.savetxt('name.txt', arr)

### 加载savetxt保存的txt文件

In [None]:
arr = np.loadtxt('name.txt')

### Save several arrays into a single file in uncompressed .npz format.

In [None]:
np.savez('tmp/a.npz', arr1=arr1, arr2=arr2) 

#### 或者压缩格式

In [None]:
np.savez_compressed('tmp/a_compressed.npz', arr1=arr1, arr2=arr2)

### 加载npz文件

In [None]:
a = np.load('tmp/a.npz')
print(a['arr1'], a['arr2'])