In [1]:
import numpy as np

In [2]:
np.version

<module 'numpy.version' from '/home/disen/anaconda3/lib/python3.7/site-packages/numpy/version.py'>

In [3]:
np.__version__

'1.15.4'

## 创建ndarray对象
- 使用list列表和numpy.array()函数
- 使用numpy库中提供的相关函数

### 使用list方式创建多维数组ndarray对象

In [4]:
# [注意] 数组的元素类型都是统一的， 
#       如果不统一时，则按 int->float->str 从低到高的优先级进行转换
arr1 = np.array([1,2,3], dtype=np.int8)
arr1

array([1, 2, 3], dtype=int8)

In [5]:
arr2 = np.array(['disen', 20, 99.5])
arr2

array(['disen', '20', '99.5'], dtype='<U5')

In [6]:
# ndarray的数组是可以直接迭代的
a2 = list(arr2)
a2

['disen', '20', '99.5']

In [7]:
for _ in arr2:
    print(_)

disen
20
99.5


In [8]:
#? 是否可以通过元组来创建ndarray
for _ in np.array((1, 2, 3)):
    print(_)

1
2
3


In [9]:
# ndarray是一个多维数组的对象
# 可以是一维的、二维, ... n维的
data = (
    (1, 2),
    (3, 4),
    (5, 6)
)
arr3 = np.array(data, np.int8)
arr3

array([[1, 2],
       [3, 4],
       [5, 6]], dtype=int8)

In [10]:
type(arr3)

numpy.ndarray

### numpy的函数创建ndarray

In [11]:
# 1. 创建3*3的二维数组，每个元素都是1
arr_1 = np.ones(shape=(3, 3), dtype=np.int8)
arr_1.astype(np.int8)

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]], dtype=int8)

In [12]:
# 2. 创建3*3*2的三维数组，每个元素都是0
arr_2 = np.zeros((3, 3, 2), dtype=np.int)
arr_2

array([[[0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]]])

In [13]:
# 3. 创建4*3的二维数组，每个元素都是9
arr_3 = np.full((4, 3), fill_value=9, dtype=np.int8)
arr_3

array([[9, 9, 9],
       [9, 9, 9],
       [9, 9, 9],
       [9, 9, 9]], dtype=int8)

In [14]:
# 4. 创建5*5的二维数组，对角线上的每个元素都是1的单位矩阵
arr_4 = np.eye(5, dtype=int)
arr_4

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]])

### 创建等差数列
- numpy.linspace()
- numpy.arange()

In [15]:
arr_5 = np.linspace(2, 100, num=50, endpoint=True).astype(int)
arr_5

array([  2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24,  26,
        28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,  52,
        54,  56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,
        80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100])

In [16]:
arr_6 = np.arange(100, step=2) # 不包含stop值, 默认start是0
arr_6

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
       68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])

### 随机生成ndarray
- numpy.random.random()
- numpy.random.randint()
- numpy.random.uniform()
- numpy.random.randn()
- numpy.random.normal()

In [17]:
arr_7 = np.random.random((3, 2)) # 取值范围 [0,1)
arr_7

array([[0.17386005, 0.16721412],
       [0.56481191, 0.80349883],
       [0.93049688, 0.62191783]])

In [18]:
arr_8 = np.random.randint(10, size=(5, 3))
arr_8

array([[7, 8, 7],
       [1, 8, 5],
       [9, 8, 6],
       [7, 5, 1],
       [9, 0, 1]])

In [19]:
arr_9 = np.random.uniform(1.5, 5.5, (4, 5))
arr_9

array([[3.42797403, 3.9864969 , 3.63696852, 3.7193078 , 5.07689835],
       [3.60565994, 1.76538573, 2.15980079, 3.19055906, 4.49343826],
       [4.25921984, 4.00469352, 3.54545752, 5.29576504, 2.23654107],
       [5.06779175, 1.85293022, 4.40840265, 4.3536503 , 4.86449621]])

In [20]:
# ndarray的元素是否可以四舍五入?
round(4.55555,2)

4.56

In [21]:
np.round(arr_9, 2)

array([[3.43, 3.99, 3.64, 3.72, 5.08],
       [3.61, 1.77, 2.16, 3.19, 4.49],
       [4.26, 4.  , 3.55, 5.3 , 2.24],
       [5.07, 1.85, 4.41, 4.35, 4.86]])

In [22]:
arr_10 = np.random.randn(3, 2, 4)
arr_10

array([[[ 2.0664208 ,  0.97144314, -0.67662217, -1.10866126],
        [-0.88274012, -2.37102288,  1.54560741,  0.66449649]],

       [[ 1.35071963,  2.81158272, -1.33756295, -0.08548935],
        [-0.78093781,  0.87080819, -0.18021601,  0.79023595]],

       [[ 0.83348759, -0.4786336 ,  0.21241187, -0.44271458],
        [ 0.45248264,  0.25504331,  0.92115202, -2.19970795]]])

In [23]:
np.random.seed(1) # 随机种子:  使用相同的种子，每次生成的数据是一样的
np.random.random((3, 2))

array([[4.17022005e-01, 7.20324493e-01],
       [1.14374817e-04, 3.02332573e-01],
       [1.46755891e-01, 9.23385948e-02]])

In [24]:
np.random.seed(1) # 随机种子:  使用相同的种子，每次生成的数据是一样的
np.random.random((3, 2))

array([[4.17022005e-01, 7.20324493e-01],
       [1.14374817e-04, 3.02332573e-01],
       [1.46755891e-01, 9.23385948e-02]])

### ndarray对象的属性
- ndim 维度, 一个数值，如 2
- shape 形状， 是一个元组， 如 (3, )或 (3, 2)
- size  元素的总大小， 即元素的个数
- dtype 元素的数据类型

In [25]:
arr_10.ndim

3

In [27]:
display(arr_10.shape)  # 类似于print()函数
display(arr_10.size)
display(arr_10.dtype)

(3, 2, 4)

24

dtype('float64')

### ndarray的索引操作
- 索引位置
    - a[下标]
    - a[行下标， 列下标]
    - a[[下标1， 下标2, ...]]
- 索引切片
    - a[起始:结束:步长]
    - a[:结束]  从0开始到指定的结束位置,步长为1
    - a[:, 下标] 选择所有行，指定的列下标

In [28]:
# 生成5位同学的三门课程成绩表
d = np.random.randint(100, size=(5, 3))
d

array([[76, 71,  6],
       [25, 50, 20],
       [18, 84, 11],
       [28, 29, 14],
       [50, 68, 87]])

In [29]:
# 查看第3位同学的第2门课程的成绩
d[2, 1]

84

In [32]:
# 查看第2、4、5位同学的第1门和第3门课程的成绩
d[[1,3,4]][:, [0, 2]]

array([[25, 20],
       [28, 14],
       [50, 87]])

In [33]:
# 查看前3位同学的前2门课程
d[:3, :2]

array([[76, 71],
       [25, 50],
       [18, 84]])

In [34]:
# 将所有列反转一下
d[:, ::-1]

array([[ 6, 71, 76],
       [20, 50, 25],
       [11, 84, 18],
       [14, 29, 28],
       [87, 68, 50]])

### 变形与转置
- ndarray.reshape()  变形
- ndarray.T/transpose() 矩阵的转置

In [36]:
d = np.random.randint(100, size=24)
d

array([87, 94, 96, 86, 13,  9,  7, 63, 61, 22, 57,  1,  0, 60, 81,  8, 88,
       13, 47, 72, 30, 71,  3, 70])

In [37]:
d.shape

(24,)

In [38]:
# 变成3行8列
d.reshape((3, 8))

array([[87, 94, 96, 86, 13,  9,  7, 63],
       [61, 22, 57,  1,  0, 60, 81,  8],
       [88, 13, 47, 72, 30, 71,  3, 70]])

In [39]:
# 变成24行1列
d.reshape((24, 1))

array([[87],
       [94],
       [96],
       [86],
       [13],
       [ 9],
       [ 7],
       [63],
       [61],
       [22],
       [57],
       [ 1],
       [ 0],
       [60],
       [81],
       [ 8],
       [88],
       [13],
       [47],
       [72],
       [30],
       [71],
       [ 3],
       [70]])

In [40]:
# 变成2列，任意行数
d.reshape((-1, 2))

array([[87, 94],
       [96, 86],
       [13,  9],
       [ 7, 63],
       [61, 22],
       [57,  1],
       [ 0, 60],
       [81,  8],
       [88, 13],
       [47, 72],
       [30, 71],
       [ 3, 70]])

 注意： 无论如何变形， 数组的总长度size不会变的。

In [42]:
d2 = d.reshape((3, 8))
d2

array([[87, 94, 96, 86, 13,  9,  7, 63],
       [61, 22, 57,  1,  0, 60, 81,  8],
       [88, 13, 47, 72, 30, 71,  3, 70]])

In [43]:
# 转置是将行和列进行互换
d2.T

array([[87, 61, 88],
       [94, 22, 13],
       [96, 57, 47],
       [86,  1, 72],
       [13,  0, 30],
       [ 9, 60, 71],
       [ 7, 81,  3],
       [63,  8, 70]])

In [44]:
d2.transpose()

array([[87, 61, 88],
       [94, 22, 13],
       [96, 57, 47],
       [86,  1, 72],
       [13,  0, 30],
       [ 9, 60, 71],
       [ 7, 81,  3],
       [63,  8, 70]])

select a1.year, a1.amount as m1, a2.amount as m2
from A a1 
join A a2 on (a1.year = a2.year)
where a1.month='1' and a2.month='2';

-- if(条件， 为True的结果， 为False的结果)
select year, 
       max(round(if(month='1', amount, 0), 1)) as m1,
       max(round(if(month='2', amount, 0), 1)) as m2
from A
group by year;

### ndarray的级联

将多个数组按水平或垂直的方向进行拼接, axis表示级联的方向
- np.concatenate((a1, a2, ...), axis=0)
- np.hstack((a1, a2, ...) 水平方向
- np.vstack((a1, a2, ...) 垂直方向

In [48]:
a1 = np.array([[1, 2], 
               [3, 4]])

a2 = np.array([[10, 20], 
               [30, 40]])

# axis=0表示垂直方向级联， 0表示最外层
# axis=1表示水平方向级联， 1表示第二层， 
#                       -1表示最内层，如果是二维数组时，-1和1是一样的
np.concatenate((a1, a2), axis=-1)  

array([[ 1,  2, 10, 20],
       [ 3,  4, 30, 40]])

In [49]:
np.hstack((a1, a2))

array([[ 1,  2, 10, 20],
       [ 3,  4, 30, 40]])

In [50]:
np.vstack((a1, a2))

array([[ 1,  2],
       [ 3,  4],
       [10, 20],
       [30, 40]])

In [51]:
# 生成两个三位学生的2门课程的成绩表，并且合成四门课程的三位同学的成绩表
s1 = np.random.randint(100, size=(3, 2))
s2 = np.random.randint(100, size=(3, 2))
np.concatenate((s1, s2), axis=-1)

array([[21, 49, 43, 76],
       [57,  3, 26, 52],
       [68, 24, 80, 41]])

In [52]:
np.hstack((s1, s2))

array([[21, 49, 43, 76],
       [57,  3, 26, 52],
       [68, 24, 80, 41]])

### ndarray的切割
- np.split(arr, indexies_or_sections, axis=0)
- np.hsplit()  等价于axis=1
- np.vsplit()  等价于axis=0

In [53]:
s3 = np.random.randint(100, size=(10, 6))
s3

array([[82, 15, 64, 68, 25, 98],
       [87,  7, 26, 25, 22,  9],
       [67, 23, 27, 37, 57, 83],
       [38,  8, 32, 34, 10, 23],
       [15, 87, 25, 71, 92, 74],
       [62, 46, 32, 88, 23, 55],
       [65, 77,  3,  0, 77,  6],
       [52, 85, 70,  2, 76, 91],
       [21, 75,  7, 77, 72, 75],
       [76, 43, 20, 30, 36,  7]])

In [56]:
np.split(s3, [4], axis=1)  # 从第5列的前面切割

[array([[82, 15, 64, 68],
        [87,  7, 26, 25],
        [67, 23, 27, 37],
        [38,  8, 32, 34],
        [15, 87, 25, 71],
        [62, 46, 32, 88],
        [65, 77,  3,  0],
        [52, 85, 70,  2],
        [21, 75,  7, 77],
        [76, 43, 20, 30]]), array([[25, 98],
        [22,  9],
        [57, 83],
        [10, 23],
        [92, 74],
        [23, 55],
        [77,  6],
        [76, 91],
        [72, 75],
        [36,  7]])]

In [59]:
a1, a2, a3 = np.split(s3, [5, 8], axis=0)
a1

array([[82, 15, 64, 68, 25, 98],
       [87,  7, 26, 25, 22,  9],
       [67, 23, 27, 37, 57, 83],
       [38,  8, 32, 34, 10, 23],
       [15, 87, 25, 71, 92, 74]])

In [60]:
np.vsplit(s3, [5, 8])

[array([[82, 15, 64, 68, 25, 98],
        [87,  7, 26, 25, 22,  9],
        [67, 23, 27, 37, 57, 83],
        [38,  8, 32, 34, 10, 23],
        [15, 87, 25, 71, 92, 74]]), array([[62, 46, 32, 88, 23, 55],
        [65, 77,  3,  0, 77,  6],
        [52, 85, 70,  2, 76, 91]]), array([[21, 75,  7, 77, 72, 75],
        [76, 43, 20, 30, 36,  7]])]

In [61]:
np.hsplit(s3, [4]) 

[array([[82, 15, 64, 68],
        [87,  7, 26, 25],
        [67, 23, 27, 37],
        [38,  8, 32, 34],
        [15, 87, 25, 71],
        [62, 46, 32, 88],
        [65, 77,  3,  0],
        [52, 85, 70,  2],
        [21, 75,  7, 77],
        [76, 43, 20, 30]]), array([[25, 98],
        [22,  9],
        [57, 83],
        [10, 23],
        [92, 74],
        [23, 55],
        [77,  6],
        [76, 91],
        [72, 75],
        [36,  7]])]

### ndarray的copy副本
- 属性深度copy 

In [63]:
a1

array([[82, 15, 64, 68, 25, 98],
       [87,  7, 26, 25, 22,  9],
       [67, 23, 27, 37, 57, 83],
       [38,  8, 32, 34, 10, 23],
       [15, 87, 25, 71, 92, 74]])

In [64]:
aa1 = a1.copy()

In [65]:
aa1[0, 0] = 100
display(aa1[0,0], a1[0, 0])

100

82

In [67]:
a1.max(axis=0) # 显示每列的最大值

array([87, 87, 64, 71, 92, 98])

In [68]:
a1.min(axis=1) # 显示每一行的最小值

array([15,  7, 23,  8, 15])