In [None]:
# numpy 的主要作用就是多维矩阵的计算。
# 所以它在数据处理方面非常有用。

import numpy as np


In [None]:
# 创建一个3×5的多维数组
a = np.arange(15).reshape(3, 5)
print(a)

# 它的形状
print(a.shape)

# 它的维度
print(a.ndim)

# 数据类型
print(a.dtype.name)

# 每个元素的大小
print(a.itemsize)

#元素个数
print(a.size)

# 类型
print(type(a))

b = np.array([6, 7, 8])
print(b)
# 类型
print(type(b))

# 类型可以在创建的时候指定
c = np.array( [ [1,2], [3,4] ], dtype=complex)
print(c)


In [None]:
# 一般情况下，矩阵的维数是已知的，但是内容不知道，这样就可以拿一些东西来填充。
print(np.zeros((3,4)))
print(np.ones( (2,3,4), dtype=np.int16 ))
print(np.empty((2,3)))


In [None]:
# 另一个函数linspace不是提供步长，而是返回的元素个数。
from numpy import pi
print(np.linspace( 0, 2, 9 ))                 # 9 numbers from 0 to 2

x = np.linspace( 0, 2*pi, 100 )        # useful to evaluate function at lots of points
print(x)
f = np.sin(x)
print(f)


In [None]:
# 注意学习下面的这些函数
# array, zeros, zeros_like, ones, ones_like, empty, empty_like, arange,
# linspace, numpy.random.rand, numpy.random.randn, fromfunction, fromfile

In [None]:
# numpy的矩阵运算。
# 矩阵之间的数学运算符的运算的元素级别的。
a = np.array( [20,30,40,50] )

b = np.arange( 4 )
print(b)

c = a-b
print(c)

print(b**2)

print(10*np.sin(a))

print(a<35)


In [None]:
# * 符号是元素之间的相乘关系
A = np.array( [[1,1],
            [0,1]] )
B = np.array( [[2,0],
            [3,4]] )
print(A * B)                      # elementwise product

print(A @ B)                       # matrix product

print(A.dot(B))                    # another matrix product


In [None]:
# 类型upcasting,不同类型的元素之间的运算之间的类型转换。
a = np.ones(3, dtype=np.int32)
b = np.linspace(0,pi,3)
print(b.dtype.name)

c = a+b
print(c)

print(c.dtype.name)


In [None]:
# 可以对所有的元素进行运算，例如求和、最小、最大值
a = np.random.random((2,3))
print(a)

print(a.sum())

print(a.min())

print(a.max())


In [None]:
# 也可以在不同的维度上取sum和min等
b = np.arange(12).reshape(3,4)
print(b)

print(b.sum(axis=0))  # sum of each column

print(b.min(axis=1))  # min of each row

print(b.cumsum(axis=1))      # cumulative sum along each row


In [None]:
# numpy还提供了一些其它的函数，用于元素级别的运算，例如指数、开方等等。
B = np.arange(3)
print(B)

print(np.exp(B))

print(np.sqrt(B))

C = np.array([2., -1., 4.])
print(np.add(B, C))


In [None]:
# 可以参考下面的函数
# all, any, apply_along_axis, argmax, argmin, argsort, average, bincount,
# ceil, clip, conj, corrcoef, cov, cross, cumprod, cumsum, diff, dot, floor,
# inner, inv, lexsort, max, maximum, mean, median, min, minimum, nonzero,
# outer, prod, re, round, sort, std, sum, trace, transpose, var, vdot, vectorize, where

In [None]:
# 如何对矩阵中的元素取值和赋值
a = np.arange(10)**3
print(a)

print(a[2])

print(a[2:5])

# equivalent to a[0:6:2] = -1000; from start to position 6, exclusive,
# set every 2nd element to -1000
a[:6:2] = -1000
print(a)

print(a[ : :-1])                                 # reversed a

'''
for i in a:
    print(i**(1/3.))
'''

In [None]:
def f(x,y):
    return 10*x+y

b = np.fromfunction(f,(5,4),dtype=int)
print(b)

print(b[2,3])

print(b[0:5, 1])                       # each row in the second column of b

print(b[ : ,1])                        # equivalent to the previous example

print(b[1:3, : ])                      # each column in the second and third row of b

print(b[-1])                           # the last row. Equivalent to b[-1,:]

In [None]:
# 还可以使用...来代替其余的内容
c = np.array( [[[  0,  1,  2],               # a 3D array (two stacked 2D arrays)
                [ 10, 12, 13]],
               [[100,101,102],
                [110,112,113]]])
print(c.shape)

print(c[1,...])                                   # same as c[1,:,:] or c[1]

print(c[...,2])                                   # same as c[:,:,2]

In [None]:
# 用下面的方式来列举矩阵中的元素
for row in b:
    print(row)

for element in b.flat:
    print(element)

# 参考Indexing, Indexing (reference), newaxis, ndenumerate, indices


In [None]:
# 矩阵的变形
a = np.floor(10*np.random.random((3,4)))

print(a)

print(a.shape)

print(a.ravel())  # returns the array, flattened

print(a.reshape(6,2))  # returns the array with a modified shape

print(a.T)  # returns the array, transposed

print(a.T.shape)

print(a.shape)


In [None]:
# 如果某个维度是-1,那么这个维度自动计算。
print(a.reshape(3,-1))

# reshape只是返回一个改变后的矩阵，但是resize直接改变原来的矩阵。
print(a)
a.resize((2,6))
print(a)


In [None]:
# 可以把矩阵罗列起来。
a = np.floor(10*np.random.random((2,2)))
print(a)

b = np.floor(10*np.random.random((2,2)))
print(b)

# 竖着罗
print(np.vstack((a,b)))

# 横着罗
print(np.hstack((a,b)))


In [None]:
# 通过newaxis可以增加维度。
from numpy import newaxis
np.column_stack((a,b))     # with 2D arrays

a = np.array([4.,2.])
b = np.array([3.,8.])
print(np.column_stack((a,b)))     # returns a 2D array

print(np.hstack((a,b)))           # the result is different

print(a[:,newaxis])              

# this allows to have a 2D columns vector
print(np.column_stack((a[:,newaxis],b[:,newaxis])))

# the result is the same
print(np.hstack((a[:,newaxis],b[:,newaxis])))   



In [None]:
# 可以把大的矩阵分割成小的
a = np.floor(10*np.random.random((2,12)))
print(a)

print(np.hsplit(a,3))   # Split a into 3

# Split a after the third and the fourth column
print(np.hsplit(a,(3,4)))   


Copies and Views

In [None]:
# 赋值语句并不产生拷贝动作，两个变量指向同一个对象
a = np.arange(12)
b = a            # no new object is created
print(b is a)           # a and b are two names for the same ndarray object

b.shape = 3,4    # changes the shape of a
print(a.shape)

def f(x):
    print(id(x))

print(id(a))    # id is a unique identifier of an object

f(a)


In [None]:
# view表示一个浅copy，
c = a.view()
print(c is a)

print(c.base is a)          # c is a view of the data owned by a

print(c.flags.owndata)

c.shape = 2,6       # a's shape doesn't change
print(a.shape)

c[0,4] = 1234        # a's data changes
print(c)
print(a)

# spaces added for clarity; could also be written "s = a[:,1:3]"
s = a[ : , 1:3]  

# s[:] is a view of s. Note the difference between s=10 and s[:]=10
s[:] = 10 

print(a)


Fancy indexing and index tricks¶

In [None]:
# 可以把一个数组作为另一个数组的索引
a = np.arange(12)**2                       # the first 12 square numbers
i = np.array( [ 1,1,3,8,5 ] )              # an array of indices
print(a[i])                                       # the elements of a at the positions i

# 还可以保持形状。
j = np.array( [ [ 3, 4], [ 9, 7 ] ] )      # a bidimensional array of indices
print(a[j])
# the same shape as j

In [None]:
# 当索引数组是一个多维的情况下，只对被索引数组的第一维起作用。
palette = np.array( [ [0,0,0],                # black
                      [255,0,0],              # red
                      [0,255,0],              # green
                      [0,0,255],              # blue
                      [255,255,255] ] )       # white
image = np.array( [ [ 0, 1, 2, 0 ],           # each value corresponds to a color in the palette
                    [ 0, 3, 4, 0 ]  ] )
print(palette[image])                            # the (2,4,3) color image


In [None]:
# 可以有多于一维的索引
a = np.arange(12).reshape(3,4)
print(a)

 # indices for the first dim of a
i = np.array( [ [0,1],                        
                [1,2] ] )

# indices for the second dim
j = np.array( [ [2,1],                        
                [3,3] ] )

print(a[i,j])   # i and j must have equal shape                                  

print(a[i,2])

print(a[:,j])        # i.e., a[ : , j]

#可以把i和j放在一个sequence里面,例如list或者tuple
l = [i,j]
print(a[l])                # equivalent to a[i,j]


In [None]:
# 可以使用索引来赋值
a = np.arange(5)
print(a)

a[[1,3,4]] = 0
print(a)

# 当索引有重复时，以最后一个为准。
a = np.arange(5)
a[[0,0,2]]=[1,2,3]
print(a)


In [None]:
# 可以用布尔值作为索引
a = np.arange(12).reshape(3,4)
b = a > 4
print(b)           # b is a boolean with a's shape

print(a[b])        # 1d array with the selected elements

print(a[a>4])      # it is same

# 一个方便的用处是赋值
a[b] = 0
print(a)

In [None]:
import matplotlib.pyplot as plt
def mandelbrot( h,w, maxit=20 ):
    """Returns an image of the Mandelbrot fractal of size (h,w)."""
    y,x = np.ogrid[ -1.4:1.4:h*1j, -2:0.8:w*1j ]
    c = x+y*1j
    z = c
    divtime = maxit + np.zeros(z.shape, dtype=int)

    for i in range(maxit):
        z = z**2 + c
        diverge = z*np.conj(z) > 2**2            # who is diverging
        div_now = diverge & (divtime==maxit)  # who is diverging now
        divtime[div_now] = i                  # note when
        z[diverge] = 2                        # avoid diverging too much

    return divtime
plt.imshow(mandelbrot(400,400))
# plt.show()

In [None]:
a = np.arange(12).reshape(3,4)
b1 = np.array([False,True,True])             # first dim selection
b2 = np.array([True,False,True,False])       # second dim selection

print(a[b1,:])        # selecting rows

print(a[b1])          # same thing

print(a[:,b2])        # selecting columns

print(a[b1,b2])       # a weird thing to do


In [None]:
# The ix_ function can be used to combine different vectors 
# so as to obtain the result for each n-uplet. 
# For example, if you want to compute all the a+b*c 
# for all the triplets taken from each of the vectors a, b and c:

a = np.array([2,3,4,5])
b = np.array([8,5,4])
c = np.array([5,4,6,8,3])
ax,bx,cx = np.ix_(a,b,c)
print(ax)

print(bx)

print(cx)

print(ax.shape, bx.shape, cx.shape)

result = ax+bx*cx
print(result)


print(result[3,2,4])

print(a[3]+b[2]*c[4])


In [None]:
# 基本的线性代数操作。
a = np.array([[1.0, 2.0], [3.0, 4.0]])
print(a)

print(a.transpose())

print(np.linalg.inv(a))

u = np.eye(2) # unit 2x2 matrix; "eye" represents "I"
print(u)


j = np.array([[0.0, -1.0], [1.0, 0.0]])

print(j @ j)        # matrix product

print(np.trace(u))  # trace

y = np.array([[5.], [7.]])
print(np.linalg.solve(a, y))

print(np.linalg.eig(j))

In [None]:
# 当改变矩阵的形状是，矩阵自动reshape
a = np.arange(30)
a.shape = 2,-1,3  # -1 means "whatever is needed"
print(a.shape)

print(a)

In [None]:
import matplotlib.pyplot as plt
# Build a vector of 10000 normal deviates with variance 0.5^2 and mean 2
mu, sigma = 2, 0.5
v = np.random.normal(mu,sigma,10000)
# Plot a normalized histogram with 50 bins
plt.hist(v, bins=50, density=1)       # matplotlib version (plot)
#plt.show()

In [None]:
# Compute the histogram with numpy and then plot it
(n, bins) = np.histogram(v, bins=50, density=True)  # NumPy version (no plot)
plt.plot(.5*(bins[1:]+bins[:-1]), n)
# plt.show()