# Numpy Tutorial
## 参考
- 《Python for Data Analysis》

## 简介
Numpy: Numerical Python是Python科学计算中的基础库。
具有以下优点
- **ndarray** 提供了快速高效的向量化操作
- 常用的标准操作
- 读写工具
- 线性代数与随机函数
- 方便与其他语言对接


## 创建ndarrays
### array Function
array(...)
    array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0)

In [5]:
import numpy as np
ar=range(5)
print ar
print type(ar)
ndar1=np.array(ar)
print ndar1
print type(ndar1)

[0, 1, 2, 3, 4]
<type 'list'>
[0 1 2 3 4]
<type 'numpy.ndarray'>


### random

In [6]:
ndar2=np.random.random((2,3))
print ndar2
print type(ndar2)

[[ 0.80598768  0.00407741  0.28500418]
 [ 0.70382016  0.21241062  0.6374319 ]]
<type 'numpy.ndarray'>


### zeros

In [7]:
zar=np.zeros(5)
print zar

[ 0.  0.  0.  0.  0.]


In [8]:
zar2=np.zeros((2,3))
print zar2

[[ 0.  0.  0.]
 [ 0.  0.  0.]]


### ones/ones_like

In [9]:
oar=np.ones(5)
print oar

[ 1.  1.  1.  1.  1.]


In [11]:
oar2=np.ones((2,3))
print oar2

[[ 1.  1.  1.]
 [ 1.  1.  1.]]


In [13]:
oar3=np.ones_like(zar2)
print oar3

[[ 1.  1.  1.]
 [ 1.  1.  1.]]


### eye/identity
eye(N, M=None, k=0, dtype=<type 'float'>)
    Return a 2-D array with ones on the diagonal and zeros elsewhere.
    
    Parameters
    ----------
    N : int
      Number of rows in the output.
    M : int, optional
      Number of columns in the output. If None, defaults to `N`.
    k : int, optional
      Index of the diagonal: 0 (the default) refers to the main diagonal,
      a positive value refers to an upper diagonal, and a negative value
      to a lower diagonal.

In [18]:
ear=np.eye(5)
print ear

[[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]


In [20]:
ear2=np.eye(4,M=3)
print ear2

[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 0.  0.  0.]]


In [21]:
ear3=np.eye(5,M=3,k=-1)
print ear3

[[ 0.  0.  0.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 0.  0.  0.]]


### arrange

In [30]:
aar=np.arange(8)
print aar

[0 1 2 3 4 5 6 7]


In [34]:
aar2=np.arange(start=1,stop=19,step=3)
print aar2

[ 1  4  7 10 13 16]


### data type for ndarray

In [23]:
arr1=np.array([1,2,3,4])
print arr1.dtype

int32


In [24]:
arr2=np.array([1,2,3,4],dtype=np.float64)
print arr2.dtype

float64


In [25]:
arr3=arr2.astype(np.int64)
print arr3
print arr3.dtype

[1 2 3 4]
int64


In [28]:
arr4=arr3.astype(np.string_)
print arr4
print arr4.dtype

['1' '2' '3' '4']
|S21


### 矩阵与系数基本操作

In [36]:
arr=np.arange(1,7).reshape((2,3))
print arr

[[1 2 3]
 [4 5 6]]


In [37]:
print arr+arr

[[ 2  4  6]
 [ 8 10 12]]


In [38]:
print arr*arr

[[ 1  4  9]
 [16 25 36]]


In [40]:
print 3 *arr

[[ 3  6  9]
 [12 15 18]]


In [44]:
print arr - arr

[[0 0 0]
 [0 0 0]]


In [46]:
print 1.0/arr

[[ 1.          0.5         0.33333333]
 [ 0.25        0.2         0.16666667]]


In [47]:
print arr**3

[[  1   8  27]
 [ 64 125 216]]


### Index and Slice
Index和Slice是对原有数组的引用（视图）并不是copy

In [50]:
arr= np.arange(12).reshape((3,4))
print arr

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [51]:
print arr[1:,1:]

[[ 5  6  7]
 [ 9 10 11]]


In [52]:
print arr[1:-1,1:-1]

[[5 6]]


### bolean indexing


In [57]:
weeks=np.array(['mon','tus','wed','thu','fri','sat','sun'],dtype='S3')
print weeks.dtype

|S3


In [66]:
holiday=((weeks=='sun') | (weeks=='sat'))
print holiday

[False False False False False  True  True]


In [69]:
salary=np.ones((7,5))*500
print salary

[[ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]]


In [70]:
salary[holiday]=800
print salary

[[ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 500.  500.  500.  500.  500.]
 [ 800.  800.  800.  800.  800.]
 [ 800.  800.  800.  800.  800.]]


In [73]:
data=np.arange(49).reshape((7,7))
data[(data>20) | (data<10)]=0
print data

[[ 0  0  0  0  0  0  0]
 [ 0  0  0 10 11 12 13]
 [14 15 16 17 18 19 20]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]]


### Fancy indexing

In [85]:
data=np.arange(49).reshape(7,7)
print data
print data.ndim
print data.shape
print data.size
print data.itemsize

[[ 0  1  2  3  4  5  6]
 [ 7  8  9 10 11 12 13]
 [14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27]
 [28 29 30 31 32 33 34]
 [35 36 37 38 39 40 41]
 [42 43 44 45 46 47 48]]
2
(7, 7)
49
4


In [86]:
print data.ndim

2


In [87]:
print data.shape

(7, 7)


In [88]:
print data.size

49


In [89]:
print data.itemsize

4


In [91]:
print data[[1,3,5]]

[[ 7  8  9 10 11 12 13]
 [21 22 23 24 25 26 27]
 [35 36 37 38 39 40 41]]


In [92]:
print data[[2,4],[2,4]]

[16 32]


In [94]:
print data[[2,4]][:,[2,4]]

[[16 18]
 [30 32]]


### Element-wise array function
abs,fabs,sqrt,suare,exp,sign,ceil,floor,rint,sin,cos  
add, substract, multiply, power

In [100]:
x = np.arange(10)
print x

[0 1 2 3 4 5 6 7 8 9]


In [101]:
print np.sqrt(x)

[ 0.          1.          1.41421356  1.73205081  2.          2.23606798
  2.44948974  2.64575131  2.82842712  3.        ]


### Expressing Conditional Logic as Array Operations


In [102]:
xarr=np.array([1,3,5,7,9])
yarr=np.array([2,4,6,8,10])
cond=np.array([True,True,False,False,True])
result=[(x if c else y) for x,y,c in zip(xarr,yarr,cond)]
print result

[1, 3, 6, 8, 9]


In [103]:
result2=[]
for i in range(5):
    if cond[i]:
        result2.append(xarr[i])
    else:
        result2.append(yarr[i])
aresult2=np.array(result2)
print aresult2
        

[1 3 6 8 9]


### Mathematical and Statistical Methods
mean,sum,cumsum,cumprod

In [104]:
arr=np.arange(25).reshape(5,5)
print arr

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [106]:
print arr.mean()

12.0


In [108]:
print arr.mean(axis=0)

[ 10.  11.  12.  13.  14.]


In [109]:
print arr.mean(axis=1)

[  2.   7.  12.  17.  22.]


In [107]:
print arr.sum()

300


In [111]:
arr.sum(axis=0)

array([50, 55, 60, 65, 70])

In [112]:
arr.sum(axis=1)

array([ 10,  35,  60,  85, 110])

In [114]:
arr.cumsum()

array([  0,   1,   3,   6,  10,  15,  21,  28,  36,  45,  55,  66,  78,
        91, 105, 120, 136, 153, 171, 190, 210, 231, 253, 276, 300])

In [115]:
arr.cumsum(axis=0)

array([[ 0,  1,  2,  3,  4],
       [ 5,  7,  9, 11, 13],
       [15, 18, 21, 24, 27],
       [30, 34, 38, 42, 46],
       [50, 55, 60, 65, 70]])

In [116]:
arr.cumsum(axis=1)

array([[  0,   1,   3,   6,  10],
       [  5,  11,  18,  26,  35],
       [ 10,  21,  33,  46,  60],
       [ 15,  31,  48,  66,  85],
       [ 20,  41,  63,  86, 110]])

In [117]:
arr.cumprod(axis=0)

array([[     0,      1,      2,      3,      4],
       [     0,      6,     14,     24,     36],
       [     0,     66,    168,    312,    504],
       [     0,   1056,   2856,   5616,   9576],
       [     0,  22176,  62832, 129168, 229824]])

In [118]:
arr.prod()

0

### boolean arrays

In [120]:
x=np.arange(10)
print x
print (x>3).sum()

[0 1 2 3 4 5 6 7 8 9]
6


In [122]:
bools=x>8
print bools

[False False False False False False False False False  True]


In [123]:
bools.any()

True

In [124]:
bools.all()

False

### Sort

In [134]:
data=np.random.uniform(1,100,(5,5))
data=data.astype('i4')
print data

[[92 20 26 10 71]
 [42 49 70 80 87]
 [22 40 19  7 55]
 [80 49  2 46 77]
 [46 11 47 28 70]]


In [135]:
data1=np.array(data)
data1.sort()
print data1

[[10 20 26 71 92]
 [42 49 70 80 87]
 [ 7 19 22 40 55]
 [ 2 46 49 77 80]
 [11 28 46 47 70]]


In [145]:
data2=np.array(data)
data2.sort(axis=0)
print data2
print data2.dtype

[[22 11  2  7 55]
 [42 20 19 10 70]
 [46 40 26 28 71]
 [80 49 47 46 77]
 [92 49 70 80 87]]
int32


In [140]:
np.savetxt('nddata.txt',data2,delimiter=',')

In [144]:
data3=np.loadtxt('nddata.txt',delimiter=',')
print data3
print data3.dtype

[[ 22.  11.   2.   7.  55.]
 [ 42.  20.  19.  10.  70.]
 [ 46.  40.  26.  28.  71.]
 [ 80.  49.  47.  46.  77.]
 [ 92.  49.  70.  80.  87.]]
float64


### Linear Algebra
diag,dot,trace,det,eig,inv,pinv,qr,svd,solve,lstsq

In [146]:
x=np.array([[1,2,3],[4,5,6]])
y=np.ones(3)
print x
print y

[[1 2 3]
 [4 5 6]]
[ 1.  1.  1.]


In [150]:
print    x.dot(y.T)

[  6.  15.]


In [155]:
x=np.array([[2,0],[0,4]])
print np.linalg.inv(x)

[[ 0.5   0.  ]
 [ 0.    0.25]]


In [157]:
x=np.array([[2,0],[0,0]])
print np.linalg.pinv(x)

[[ 0.5  0. ]
 [ 0.   0. ]]


### Random Number Generation
seed, permutation, shuffle, rand, randint, randn, binomial, normal, beta, chisquare, gamma, uniform

In [158]:
samples=np.random.normal(size=(4,4))
print samples

[[ 0.47940341 -1.83851718 -0.1960076  -0.00774286]
 [ 0.99295095 -0.02483721 -0.89077153  0.1577324 ]
 [ 1.90411387  0.66627766 -0.47183003 -0.46493129]
 [ 1.94468379  0.3421416   0.24062656 -0.57212593]]
