# 使用前提
安装Anaconda的其他包
* 使用Pycharm进行安装
* 搜索Anaconda安装，如使用tensorflow下载  
1.搜索：`anaconda search -t conda tensorflow`  
2.选择合适版本，显示一下下载的方式：`anaconda show RMG/tensorflow`  
3.根据提示的命令进行下载：`conda install --channel https://conda.anaconda.org/RMG tensorflow`  

In [2]:
import numpy

# 基本数据操作
## 查看函数帮助
* 参数如何使用，使用格式等
* 当然也可以去官网查文档
* 一般写代码开两个窗口：一个写代码，一个print(help)，做实验看做的对不对

* `numpy.genfromtxt`的帮助文档

In [113]:
print(help(numpy.genfromtxt))

Help on function genfromtxt in module numpy:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')
    Load data from a text file, with missing values handled as specified.
    
    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.
    
    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings in Python 3k.  The strings
        in a 

* `numpy.array`的帮助文档

In [112]:
print(help(numpy.array))

Help on built-in function array in module numpy:

array(...)
    array(object, dtype=None, copy=True, order='K', subok=False, ndmin=0)
    
    Create an array.
    
    Parameters
    ----------
    object : array_like
        An array, any object exposing the array interface, an object whose
        __array__ method returns an array, or any (nested) sequence.
    dtype : data-type, optional
        The desired data-type for the array.  If not given, then the type will
        be determined as the minimum type required to hold the objects in the
        sequence.  This argument can only be used to 'upcast' the array.  For
        downcasting, use the .astype(t) method.
    copy : bool, optional
        If true (default), then the object is copied.  Otherwise, a copy will
        only be made if __array__ returns a copy, if obj is a nested sequence,
        or if a copy is needed to satisfy any of the other requirements
        (`dtype`, `order`, etc.).
    order : {'K', 'A', 'C', 'F'}

## 打开数据
* 与python不同的地方：python使用`open()`命令一行一行读入，而numpy是使用`genfromtxt`打开
* 三个参数：文件名，分隔符，读入类型

In [35]:
# 跳过第一行表头，将skip_header设置为1，直接进入正题
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype=str, skip_header=0)

# 查看使用numpy打开后数据的类型：numpy.ndarry，非常重要的结构（像是list结构的形式，但其实是ndarry，是矩阵格式）
print(type(world_alcohol))

# 打印变量
print(world_alcohol)

<class 'numpy.ndarray'>
[['Year' 'WHO region' 'Country' 'Beverage Types' 'Display Value']
 ['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]


## 构造数组/矩阵
使用`numpy.array`命令

### 打印shape
[tips] 每天用到最多的东西，看清矩阵的结构：  
* 一般用在神经网络/其他算法，当不清楚中间流程在做什么事情时，可以用`*.shape`来看一下究竟表达什么含义
* debug时会用到：将其中能打印shape的东西都打印出来，看哪个shape有问题，说不定就能找到错误

### 构造向量

In [20]:
vector = numpy.array([5, 10, 15, 20])
print(vector)
print(vector.shape)

[ 5 10 15 20]
<class 'numpy.ndarray'>
(4,)


### 构造矩阵

In [17]:
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print(matrix)
print(matrix.shape)

[[ 5 10 15]
 [20 25 30]
 [35 40 45]]
(3, 3)


## 查看ndarry数据的类型
* 使用`numpy.array`构造的数据类型只能为相同类型，不能像list中那样，既有int又有string，或者既有int又有float，最终会被统一化，用`*.dtype`来查看类型
* 优先级：string > float > int

In [27]:
numbers = numpy.array([1, 2, 3, 4])
print(numbers)
print(numbers.dtype)

numbers = numpy.array([1, 2, 3, 4.0])
print(numbers)
print(numbers.dtype)

numbers = numpy.array([1, 2, 3, '4'])
print(numbers)
print(numbers.dtype)

[1 2 3 4]
int64
[1. 2. 3. 4.]
float64
['1' '2' '3' '4']
<U21
[1. 2. 3. 4.]
float64


## 选取数据
使用索引取出指定数据

In [41]:
# 使用之前的world_alcohol变量进行选取
print(world_alcohol)

# 选第一行最后一个数
display_value = world_alcohol[1,-1]
print(display_value)

# 选取第二条数据的国家
second_country = world_alcohol[2,2]
print(second_country)

[['Year' 'WHO region' 'Country' 'Beverage Types' 'Display Value']
 ['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
0
Uruguay


## 数据切片
### 向量中取某些数（参数前闭后开）

In [42]:
vector = numpy.array([5, 10, 15, 20])
print(vector[0:3])

[ 5 10 15]


### 矩阵中取一行/一列数

In [71]:
matrix = numpy.array([
    [5, 10, 15],
    [20, 25, 30],
    [35, 40, 45]
])

#取第一行数
print(matrix[:1,:])
print(matrix[0,:])
print(matrix[0,])

print('\n')
# 取前两行数
print(matrix[:2])

print('\n')
#取第二、三行数
print(matrix[1:])

print('\n')
# 取第二列数
print(matrix[:,1:2])

print('\n')
# 取前二列数
print(matrix[:,:2])

[[ 5 10 15]]
[ 5 10 15]
[ 5 10 15]


[[ 5 10 15]
 [20 25 30]]


[[20 25 30]
 [35 40 45]]


[[10]
 [25]
 [40]]


[[ 5 10]
 [20 25]
 [35 40]]


## 数据检查&判断
### 查看某个数是否在ndarray中

In [81]:
vector = numpy.array([5, 10, 15, 20])
vector == 10 # 判断是否有10存在

array([False,  True, False, False])

In [82]:
matrix = numpy.array([
    [5, 10, 15],
    [20, 25, 30],
    [35, 40, 45]
])
matrix == 25

array([[False, False, False],
       [False,  True, False],
       [False, False, False]])

### 将上面的值当成索引
有啥卵用么，可以取出所在元素那一行/一列的数据

In [87]:
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector==10)
print(equal_to_ten)
print(vector[equal_to_ten])

[False  True False False]
[10]


In [90]:
# 查看第二列是否有哪行元素=25
second_column_25 = (matrix[:,1] == 25)

# 查看索引，知道第二行有一个数=25
print(second_column_25)

# 取出含有25的这行数
print(matrix[second_column_25,:])

[False  True False]
[[20 25 30]]


## 逻辑判断
### 与逻辑

In [210]:
# e.g.让一个数值既等于10又等于5（逻辑判断肯定不存在的）
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)
print(equal_to_ten_and_five)

[False False False False]


### 或逻辑

In [95]:
# 找出向量中数值等于10或5的向量
equal_to_ten_or_five = (vector == 10) | (vector == 5)
print(equal_to_ten_or_five)

[ True  True False False]


### 逻辑应用
* 做索引更改数值

In [97]:
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
vector[equal_to_ten_or_five] = 50
print(vector)

[50 50 15 20]


## 元素dtype类型整体转换
使用`*.astype(float)`操作

In [103]:
# 原来dtype为字符串的类型
vector = numpy.array(["1","2","3"])
print(vector.dtype)
print(vector)

print('\n')
# 更改成为dtype为float/int的类型
vector = vector.astype(float)
print(vector.dtype)
print(vector)

<U1
['1' '2' '3']


float64
[1. 2. 3.]


## 基本统计操作

### 求极值min、max

In [105]:
vector = numpy.array([5, 10, 15, 20])
print(vector.min())
print(vector.max())

5
20


### 求和

In [111]:
matrix = numpy.array([
    [5, 10, 15],
    [20, 25, 30],
    [35, 40, 45]
])
# 按列求和
print(matrix.sum(axis=0))

# 按行求和
print(matrix.sum(axis=1))

[60 75 90]
[ 30  75 120]


# numpy函数操作

In [115]:
import numpy as np

## 矩阵变换

### 构造向量元素

In [138]:
vector = np.arange(15)

# 以下是后续补充操作，可以先往下看
print(vector)
print(vector.shape)
print(vector.ndim)
print(vector.dtype)
print(vector.size)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
(15,)
1
int64
15


### 将上述向量变形reshape

In [218]:
a = np.arange(15).reshape(3, 5)
print(a)

print('\n')
# 省略参数的写法(15个元素，行为3，列肯定为5，所以5就不用写了)
a = np.arange(15).reshape(3, -1)
print(a)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


### 变回原型ravel
reshape的逆操作

In [203]:
print(a.ravel())

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


### 矩阵属性

#### 矩阵的形状

##### 1.查看矩阵的形状

In [204]:
print(a.shape)

(3, 5)


##### 2.更改矩阵的形状

In [209]:
a.shape = (5,3)
print(a)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]


#### 矩阵的纬度

In [125]:
print(a.ndim)

2


#### 矩阵元素的类型

In [135]:
print(a.dtype.name)
print(a.dtype)

int64
int64


#### 矩阵的大小（有多少个元素）

In [136]:
print(a.size)

15


## 初始化向量/矩阵

### 初始化0矩阵（元素默认为float）

In [145]:
np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

### 初始化1矩阵

In [140]:
# int型
np.ones((2, 3, 4), dtype=np.int32)

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int32)

In [141]:
# 字符串型
np.ones((2, 3, 4), dtype=np.str)

array([[['1', '1', '1', '1'],
        ['1', '1', '1', '1'],
        ['1', '1', '1', '1']],

       [['1', '1', '1', '1'],
        ['1', '1', '1', '1'],
        ['1', '1', '1', '1']]], dtype='<U1')

### 初始化一个序列
#### 规定两端点间隔
`np.linspace(a, b, distance)`，distance为相邻两点之间的间隔

In [168]:
print(np.arange(10, 30, 5))
print(np.arange(0, 2, 0.3))

[10 15 20 25]
[0.  0.3 0.6 0.9 1.2 1.5 1.8]


In [167]:
print(np.arange(10))
print(np.arange(12).reshape(4,3))

[0 1 2 3 4 5 6 7 8 9]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


#### 规定区间内有多少个端点，从而均匀划分区间
`np.linspace(a, b, points)`，points为区间内一共有多少等分点

In [174]:
from numpy import pi
two_pi_100 = np.linspace(0, 2*pi, 100)
print(two_pi_100)

print('\n')
# 将100个点投到作为sin(x)的输入
print(np.sin(two_pi_100))

[0.         0.06346652 0.12693304 0.19039955 0.25386607 0.31733259
 0.38079911 0.44426563 0.50773215 0.57119866 0.63466518 0.6981317
 0.76159822 0.82506474 0.88853126 0.95199777 1.01546429 1.07893081
 1.14239733 1.20586385 1.26933037 1.33279688 1.3962634  1.45972992
 1.52319644 1.58666296 1.65012947 1.71359599 1.77706251 1.84052903
 1.90399555 1.96746207 2.03092858 2.0943951  2.15786162 2.22132814
 2.28479466 2.34826118 2.41172769 2.47519421 2.53866073 2.60212725
 2.66559377 2.72906028 2.7925268  2.85599332 2.91945984 2.98292636
 3.04639288 3.10985939 3.17332591 3.23679243 3.30025895 3.36372547
 3.42719199 3.4906585  3.55412502 3.61759154 3.68105806 3.74452458
 3.8079911  3.87145761 3.93492413 3.99839065 4.06185717 4.12532369
 4.1887902  4.25225672 4.31572324 4.37918976 4.44265628 4.5061228
 4.56958931 4.63305583 4.69652235 4.75998887 4.82345539 4.88692191
 4.95038842 5.01385494 5.07732146 5.14078798 5.2042545  5.26772102
 5.33118753 5.39465405 5.45812057 5.52158709 5.58505361 5.648520

### 初始化随机向量/矩阵
numpy下的random模块，调用模块下的random函数

In [162]:
print(np.random.random(2))
print(np.random.random((2,3)))

[0.42722956 0.04693816]
[[0.61621283 0.87527833 0.90620097]
 [0.09875753 0.02143988 0.89576603]]


In [166]:
# 如果想弄清random模块
print(help(np.random))

Help on package numpy.random in numpy:

NAME
    numpy.random

DESCRIPTION
    Random Number Generation
    
    Utility functions
    random_sample        Uniformly distributed floats over ``[0, 1)``.
    random               Alias for `random_sample`.
    bytes                Uniformly distributed random bytes.
    random_integers      Uniformly distributed integers in a given range.
    permutation          Randomly permute a sequence / generate a random sequence.
    shuffle              Randomly permute a sequence in place.
    seed                 Seed the random number generator.
    choice               Random sample from 1-D array.
    
    
    Compatibility functions
    rand                 Uniformly distributed values.
    randn                Normally distributed values.
    ranf                 Uniformly distributed floating point numbers.
    randint              Uniformly distributed integers in a given range.
    
    Univariate distributions
    beta                 

## 矩阵的运算

### 矩阵的基本运算

In [None]:
a = np.array([20, 30, 40, 50])
b = np.arange(4)
print(a)
print(b)

#### 向量减数值

In [179]:
c = a - 1
print(c)

[19 29 39 49]


#### 向量减向量（同大小）

In [183]:
c = a - b
print(c)

[20 29 38 47]


#### 向量元素值平方

In [184]:
print(b**2)

[0 1 4 9]


#### 向量元素值判断

In [185]:
print(a < 35)

[ True  True False False]


#### 矩阵e次幂/开方

In [196]:
B = np.arange(3)
print(B)
print(np.exp(B))
print(np.sqrt(B))

[0 1 2]
[1.         2.71828183 7.3890561 ]
[0.         1.         1.41421356]


### 矩阵乘法

In [186]:
A = np.array([
    [1,1],
    [0,1]])

B = np.array([
    [2,0],
    [3,4]])

print(A)
print('\n')
print(B)

[[1 1]
 [0 1]]


[[2 0]
 [3 4]]


#### 矩阵星乘
对应位置元素相乘

In [187]:
print(A*B)

[[2 0]
 [0 4]]


#### 矩阵点乘
线性代数：A的第一行 * A的第一列

In [192]:
print(A.dot(B))
print(np.dot(A,B))

[[5 4]
 [3 4]]
[[5 4]
 [3 4]]


### 取整操作
#### 向下取整 / 地板除

In [212]:
a = np.floor(10*np.random.random((3,4)))
print(a)

[[6. 7. 9. 3.]
 [9. 3. 2. 5.]
 [6. 2. 8. 9.]]


### 矩阵转置

In [213]:
print(a.T)

[[6. 9. 6.]
 [7. 3. 2.]
 [9. 2. 8.]
 [3. 5. 9.]]


### 矩阵拼接和切分

#### 矩阵拼接

In [219]:
a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print(a)
print(b)

[[4. 7.]
 [6. 9.]]
[[9. 0.]
 [7. 5.]]


##### 1.横向拼接（增加特征）

In [220]:
print(np.hstack((a,b)))

[[4. 7. 9. 0.]
 [6. 9. 7. 5.]]


##### 2.纵向拼接（增加样本）

In [221]:
print(np.vstack((a,b)))

[[4. 7.]
 [6. 9.]
 [9. 0.]
 [7. 5.]]


#### 矩阵切分

In [240]:
a = np.floor(10*np.random.random((2,12)))
print(a)

[[3. 1. 6. 3. 6. 7. 6. 0. 4. 6. 6. 1.]
 [2. 1. 7. 7. 9. 3. 6. 4. 3. 5. 9. 2.]]


##### 1.横向切分

In [233]:
# 均分成三段
print(np.hsplit(a,3))

[array([[8., 3., 0., 3.],
       [5., 5., 0., 0.]]), array([[3., 1., 2., 5.],
       [7., 1., 4., 4.]]), array([[5., 4., 2., 7.],
       [3., 1., 7., 7.]])]


In [237]:
# 在3前5前切断点（0开始）
print(np.hsplit(a,(3,5)))

[array([[8., 3., 0.],
       [5., 5., 0.]]), array([[3., 3.],
       [0., 7.]]), array([[1., 2., 5., 5., 4., 2., 7.],
       [1., 4., 4., 3., 1., 7., 7.]])]


##### 2.纵向切分

In [245]:
a = a.T
print(a)

print('\n')
print(np.vsplit(a,3))

[[3. 2.]
 [1. 1.]
 [6. 7.]
 [3. 7.]
 [6. 9.]
 [7. 3.]
 [6. 6.]
 [0. 4.]
 [4. 3.]
 [6. 5.]
 [6. 9.]
 [1. 2.]]


[array([[3., 2.],
       [1., 1.],
       [6., 7.],
       [3., 7.]]), array([[6., 9.],
       [7., 3.],
       [6., 6.],
       [0., 4.]]), array([[4., 3.],
       [6., 5.],
       [6., 9.],
       [1., 2.]])]


### 矩阵复制

#### 直接复制（两个变量指向同一片内存区域）

In [257]:
a = np.arange(12)
print(a.shape)

b = a
print(b is a)

# 形状，数值相同
b.shape = (3,4)
print(a.shape)

# 内存相同
print(id(a))
print(id(b))

(12,)
True
(3, 4)
4768328320
4768328320


#### 浅复制
`a.view()`  
注意：
* 浅复制变量c和原变量a指向两个不同内存，改变了c的形状也不会改变a
* 但是，改变了c的数值，也会改变a的数值（虽然指向不同东西，但是公用一套元素值）

In [263]:
c = a.view()
print(c is a)

print('\n')
# 形状不同
c.shape = (2,6)
print(c)
print('----')
print(a.shape)

print('\n')
# 数值相同
c[0,4] = 1234
print(c)
print('----')
print(a)

print('\n')
# 内存不同
print(id(a))
print(id(c))

False


[[   0    1    2    3 1234    5]
 [   6    7    8    9   10   11]]
----
(3, 4)


[[   0    1    2    3 1234    5]
 [   6    7    8    9   10   11]]
----
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]


4768328320
4767532816


#### 深复制
`d = a.copy()`  
相当于用a的数值和形状为d做了初始化，但是两者没有必然联系

In [265]:
d = a.copy()
print(d is a)

d[0, 0] = 9999

print(d)
print(a)

False
[[9999    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]


### 排序和索引

#### 索引

`range()`就很有意思，不能直接print，只能用在for循环和取数，自身没有数值

In [277]:
data = np.sin(np.arange(20)).reshape(5,4)
print(data)

print('\n')
# 按列排序，找出其最大值索引
ind = data.argmax(axis=0)
print(ind)

# 查看矩阵有几行几列
# print(data.shape[0]) #行数
# print(data.shape[1]) #列数

print('\n')
# 取出每列最大的数值
data_max = data[ind, range(data.shape[1])]

print(data_max)


[[ 0.          0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
 [ 0.98935825  0.41211849 -0.54402111 -0.99999021]
 [-0.53657292  0.42016704  0.99060736  0.65028784]
 [-0.28790332 -0.96139749 -0.75098725  0.14987721]]


[2 0 3 1]


[0.98935825 0.84147098 0.99060736 0.6569866 ]


#### 扩充
将给定的数据扩充为2*3的矩阵

In [281]:
a = np.arange(0, 40, 10)
print(a)

print('\n')
b = np.tile(a,(2,3))
print(b)

[ 0 10 20 30]


[[ 0 10 20 30  0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30  0 10 20 30]]


#### 排序
##### 1.直接进行排序

In [297]:
a = np.array([
    [4,3,5],
    [1,2,1]
])
print(a)

# 后续使用argsort进行索引
q = np.argsort(a)
print(q)

[[4 3 5]
 [1 2 1]]
[[1 0 2]
 [0 2 1]]


In [296]:
# 按行进行排序（从小到大）
b = np.sort(a, axis=1)
print(b)

print('\n')
#另外一种写法
a.sort(axis=1)
print(a)
# 还有一个很神奇的因素：a.sort(axis=1)没有返回值，但是却对原数据做了动作，因此不能直接print(a.sort(axis=1))，但是可以处理后print(a)

[[3 4 5]
 [1 1 2]]


[[3 4 5]
 [1 1 2]]
[[0 1 2]
 [0 1 2]]


##### 2.获取排序的索引，手动排序

In [293]:
# 获取从小到大排序的索引
a = np.array([4,3,1,2])
j = np.argsort(a)
print(j)
print(a[j])

[2 3 1 0]
[1 2 3 4]
