## 2.2 NumPy 模块的数组对象

### 2.2.1 创建数组对象

In [None]:
import numpy as np

arr1 = np.array([10, 20, 30, 40, 50])
print(arr1)
print(f"arr1 shape: {arr1.shape}")

In [None]:
arr2 = np.array([10, 20, 30, 40, 50], ndmin=2)
print(arr2)
print(f"arr2 shape: {arr2.shape}")

In [None]:
arr3 = np.array([10, 20, 30, 40, 50], dtype=complex)
print(arr3)
print(f"arr3 shape: {arr3.shape}")

In [None]:
lists1 = [[101, 202, 303], [404, 505, 606]]
arr4 = np.array(lists1)
print(f"arr4 type: {type(arr4)}")
print(f"arr4 shape: {arr4.shape}")

In [None]:
arr5 = np.zeros((3, 4))
print(arr5)

In [None]:
arr6 = np.ones((3, 4))
print(arr6)

In [None]:
arr7 = np.eye(4)
print(arr7)

In [None]:
arr8 = np.identity(4)
print(arr8)

In [None]:
arr9 = np.empty((3, 4))
print(arr9)

### 2.2.2 数组对象类型的说明

In [None]:
import numpy as np

arr1 = np.array([12.5, 136.7, 24.6, 35.5, 109.8])
int_arr1 = arr1.astype(int)
str_arr1 = arr1.astype(str)
print(
    f"arr1 dtype: {arr1.dtype}\nint_arr1 dtype: {int_arr1.dtype}\nstr_arr1 dtype: {str_arr1.dtype}"
)

### 2.2.3 随机生成数组

In [None]:
import numpy as np

samples = np.random.normal(size=(4, 4))
print(samples)

In [None]:
from random import normalvariate
import time
import numpy as np

n = 10000000
start = time.time()
samples = [normalvariate(0, 1) for i in range(n)]
times = np.random.normal(size=n)
end = time.time()
print(f"from start to end: {end - start} seconds")

## NumPy 模块中数组的广播

In [None]:
import numpy as np

arr1 = np.array([[10, 20, 30], [7, 8, 9]])
multi_arr = arr1 * arr1
sub_arr = arr1 - arr1
print(f"大小相等的数组实现乘法：\n{multi_arr}")
print(f"大小相等的数组实现减法：\n{sub_arr}")

In [None]:
divide_arr = 100 / arr1
multi_arr = arr1 * 0.5
print(f"数组与标量值的除法运算：\n{divide_arr}")
print(f"数组与标量值的乘法运算：\n{multi_arr}")

### 2.3.1 数组广播的原则
列方向上取均值，即沿着 0 轴取均值，结果为 1 行 n 列。

In [None]:
import numpy as np

weathers = np.array(
    [
        [20, 21, 22, 18, 19, 21, 22],
        [18, 21, 23, 19, 18, 21, 13],
        [18, 19, 22, 21, 21, 17, 16],
        [15, 18, 20, 19, 21, 17, 18],
    ]
)
print(f"weather mean: \n{weathers.mean(0)}")
meaned = weathers - weathers.mean(0)
print(f"meaned: \n{meaned}")
print(f"meaned.mean(0): \n{meaned.mean(0)}")

行方向上取均值，即沿着 1 轴取均值，结果为 1 行 m 列，需要 reshape 成 m 行 1 列。

In [None]:
print(f"mean(1) reshaped to (4,1): \n{weathers.mean(1).reshape(4,1)}")
meaned = weathers - weathers.mean(1).reshape(4, 1)
print(f"meaned: \n{meaned}")
print(f"meaned.mean(1): \n{meaned.mean(1)}")

### 数组广播的妙用

In [None]:
import numpy as np

arr = np.ones((4, 6))
print(f"original arr:\n{arr}")
arr = arr * 7
print(f"arr * 7:\n{arr}")
arr1 = np.ones((4, 6))
print(f"original arr1:\n{arr1}")
arr1[:] = 8
print(f"arr1 after broadcasting:\n{arr1}")
arr1[1:3, 2:5] = 9
print(f"arr1 after slicing:\n{arr1}")

## 2.4NumPy 模块中数组的操作
### 2.4.1 基本的索引

In [None]:
import numpy as np

arr = np.array([3.1, 3.14, 3.141, 3.1415, 3.14159, 3.141592, 3.1415926, 3.14159265])
print(f"original arr: \n{arr}")
print(f"arr[4]: {arr[4]}")
print(f"-" * 20)
print(f"arr[3:6]: {arr[3:6]}")
print(f"-" * 20)
print(f"arr: \n{arr}")
print(f"-" * 20)
arr1 = arr.reshape(2, -1)
print(f"ofiginal arr1: \n{arr1}")
print(f"arr1[0][2]: {arr1[0][2]}")
print(f"arr1[0,2]: {arr1[0,2]}")

### 2.4.2 切片的索引

In [None]:
import numpy as np

arr = np.array(
    [[[3.1, 3.14], [3.141, 3.1415]], [[3.14159, 3.141592], [3.1415926, 3.14159265]]]
)
print(f"arr: \n{arr}")
print(f"arr.shape: {arr.shape}")
print(f"arr[:1,:1]: {arr[:1,:1]}")
print(f"-" * 20)
arr[:2, :1] = 3.14159265358
print(f"sliced arr after broadcasting: \n{arr}")

### 2.4.3 布尔型索引

In [None]:
import numpy as np

names = np.array(["业务员", "业务员", "经理", "主管", "业务员", "主管"])
salary = np.array([2520.00, 3600.00, 2745.00, 4200.00, 3805.00, 3947.00])
print(f"业务员薪水：{salary[names=='业务员']}")
print(f"-" * 20)
print(f"其他人薪水：{salary[names!='业务员']}")
print(f"-" * 20)
print(f"经理或主管薪水：{salary[(names=='经理') | (names=='主管')]}")
print(f"-" * 20)
print(f"大于3500的业务员薪水：{salary[(names=='业务员') & (salary>3500)]}")

### 2.4.4 数组的转置和轴变换

In [None]:
import numpy as np

arr = np.array(
    [
        [1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4],
        [5, 5, 5, 5, 5],
    ]
)
print(f"original arr:\n{arr}")
arr_trans = arr.transpose()
print(f"transposed arr:\n{arr_trans}")
arr_trans1 = arr.T
print(f"transposed arr using T:\n{arr_trans1}")

In [None]:
arr = np.random.randn(
    4,
)
print(f"original array:\n{arr}")
print(f"arr.shape:\n{arr.shape}")
print(f"arr.T\n{arr.T}")
print(f"arr.T.shape:\n{arr.T.shape}")
result = np.dot(arr.T, arr)
print(f"arr.T * arr:\n{result}")

In [None]:
arr = np.arange(24).reshape(2, 3, 4)
print(f"original array:\n{arr}")
print(f"arr shape:\n{arr.shape}")
print(f"(2,3,4) 表示 3 行 4 列 2 层")
print(f"-" * 20)
trans_arr = arr.transpose(1, 0, 2)
print(f"(1,0,2) 表示0轴与1轴交换，2轴不变")
print(f"transposed array:\n{trans_arr}")
print(f"trans_arr shape:\n{trans_arr.shape}")
print(f"(3,2,4) 表示 2 行 4 列 3 层")

### 2.4.5 元素的重复操作： repeat() 和 tile()

In [None]:
import numpy as np

arr1 = np.array([3])
print(f"original array:\n{arr1}")
repeat_arr1 = arr1.repeat(3)
print(f"repeated array:\n{repeat_arr1}")
print(f"-" * 20)
arr2 = np.eye(3)
print(f"original array:\n{arr2}")
repeat_arr2 = arr2.repeat(3)
print(f"数组被扁平化：")
print(f"repeated array:\n{repeat_arr2}")
print(f"指定维度：沿着0轴方向重复，即在行上重复")
repeat_arr2 = arr2.repeat(3, axis=0)
print(f"repeated array:\n{repeat_arr2}")

## 2.5 通用方法

In [None]:
import numpy as np

arr = np.arange(10)
print(f"original array:\n{arr}")
print(f"after sqrt:\n{np.sqrt(arr)}")
print(f"-" * 20)
max_arr = np.maximum([5, -3, 9], [1, -9, 18])
max_arr1 = np.maximum(np.eye(3), [0.6, -2, 4])
print(f"maximum() 比较对应索引位置上的元素大小，保留大值")
print(f"maximum of max_arr is:\n[{max_arr}]")
print(f"maximum of max_arr1 is:\n{max_arr1}")
print(f"-" * 20)
arr = np.arange(11)
print(f"original array:\n{arr}")
sum_arr = np.add.reduce(arr)
print(f"add.reduce() 对数组元素进行累加")
print(f"sum of arr is:\n{sum_arr}")

## 2.6 利用数组进行运算
采用NumPy中的数组,不用编写循环代码,就可以将许多种数据处理任务表述为简洁的数组表达式  
用数组表达式代替循环代码的做法,通常被称为矢量化  
一般来说,矢量化数组运算要比等价的纯Python运算快上一两个数量级
### 2.6.1 用数学方法进行统计

In [None]:
import numpy as np

arr1 = np.arange(101)
sum_arr1 = arr1.sum()
mean_arr1 = arr1.mean()
std_arr1 = arr1.std()
print(f"original array:\n{arr1}")
print(f"sum of array:\n{sum_arr1}")
print(f"mean of array:\n{mean_arr1}")
print(f"standard deviation of array:\n{std_arr1}")
print(f"-" * 20)
arr2 = np.arange(100)
arr2 = arr2.reshape(4, 25)
sum_arr2 = arr2.sum(axis=1)
mean_arr2 = arr2.mean(axis=1)
std_arr2 = arr2.std(axis=1)
print(f"original array:\n{arr2}")
print(f"sum of array:\n{sum_arr2}")
print(f"mean of array:\n{mean_arr2}")
print(f"standard deviation of array:\n{std_arr2}")
print(f"-" * 20)
arr3 = np.arange(100)
arr3 = arr3.reshape(4, 25)
sum_arr3 = arr3.cumsum(0)
multi_arr = arr.cumprod(1)
print(f"original array:\n{arr3}")
print(f"cumulative sum of array:\n{sum_arr3}")
print(f"cumulative product of array:\n{multi_arr}")

### 2.6.2 数组中布尔值的统计

In [None]:
import numpy as np

arr = np.array([[20, -33, 40, -46, 56], [13, 25, -17, 98, -20]])
print(f"original array:\n{arr}")
print(f"大于零的元素：{arr[arr>0]}")
sum_arr = (arr > 0).sum()
print(f"对大于零的元素计数：{sum_arr}")
sum_arr = arr[arr > 0].sum()
print(f"对大于零的元素求和：{sum_arr}")
print(f"-" * 20)
arr_bool = arr > 0
any_bool = arr_bool.any()
all_bool = arr_bool.all()
print(f"any_bool={any_bool}, all_bool={all_bool}")

### 2.6.3 将条件逻辑表述为数组运算
这里所说的条件逻辑表达式就是指where()方法,这个方法是一个三元表达式x if condition else y 的矢量化版本  


In [None]:
import numpy as np

red_arr1 = np.array([20, 30, 40, 50, 60, 70, 80, 90])
red_arr2 = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
bool_arr = np.array([True, False, True, False, True, False, False, True])
red_result = np.where(bool_arr, red_arr1, red_arr2)
print(f"布尔值为True发红包1，否则发红包2:\n{red_result}")

### 2.6.4 数组的合并和拆分

In [None]:
import numpy as np

men = np.array([[20, 21, 23], [25, 26, 27]])
women = np.array([[23, 22, 20], [27, 28, 26]])
print(f"original men's ages:\n{men}")
print(f"men age shape: {men.shape}")
print(f"original women's ages:\n{women}")
print(f"women age shape: {women.shape}")
print(f"-" * 20)
all = np.concatenate((men, women), axis=0)
cope = np.concatenate((men, women), axis=1)
print(f"沿0轴方向合并:\n{all}")
print(f"沿1轴方向合并:\n{cope}")

In [None]:
cope = np.array([20, 21, 23, 23, 22, 20, 25, 26, 27, 27, 28, 26])
print(f"原始数据：{cope}")
split_cope = np.split(cope, [1, 3, 6, 7])
print(f"在第1、3、6、7个数据点拆分后的数据：{split_cope}")

### 2.6.5 数组的排序

In [None]:
import numpy as np

cope = np.array([20, 21, 23, 23, 22, 20, 25, 26, 27, 27, 28, 26])
print(f"原始数组:\n{cope}")
print(f"排序后的数组:\n{np.sort(cope)}")
print(f"-" * 20)
cope = np.array([[20, 23, 21, 22, 24], [28, 25, 27, 26, 29]])
print(f"原始数组:\n{cope}")
print(f"沿着1轴排序后的数组:\n{np.sort(cope,axis=1)}")

### 2.6.6 数组的集合运算

In [None]:
import numpy as np

names = np.array(["经理", "副经理", "主管", "主管", "主管", "技术员", "业务员"])
uni_names = np.unique(names)
print(f"去重后的元素:\n{uni_names}")
print(f"-" * 20)
my_names = np.array(["主管", "技术员"])
uni_names = np.in1d(names, my_names)
print(f"是否包含指定的元素:\n{uni_names}")

## 2.7 数组文件的输入和输出
### 2.7.1 将数组以二进制的形式读取文件

In [1]:
import numpy as np

arr = np.random.randn(20)
np.save("arr_file", arr)

In [None]:
arr1= np.load("arr_file.npy")
print(f"arr1:\n{arr1}")
print(f"arr1 mean: {arr1.mean()}")
print(f"arr1 std: {arr1.std()}")